// AsmJit - Machine code generation for C++
//
//  * Official AsmJit Home Page: https://asmjit.com
//  * Official Github Repository: https://github.com/asmjit/asmjit
//
// Copyright (c) 2008-2020 The AsmJit Authors
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
//    claim that you wrote the original software. If you use this software
//    in a product, an acknowledgment in the product documentation would be
//    appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
//    misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.

#ifndef ASMJIT_TEST_OPCODE_H_INCLUDED
#define ASMJIT_TEST_OPCODE_H_INCLUDED

#include <asmjit/x86.h>

namespace asmtest {

// Generate all instructions asmjit can emit.
static void generateOpcodes(asmjit::x86::Emitter* e, bool useRex1 = false, bool useRex2 = false) {
  using namespace asmjit;
  using namespace asmjit::x86;

  bool isX64 = e->is64Bit();

  // Prevent a crash when the generated function is called to see the disassembly.
  e->ret();

  // All instructions use the following register that can be changed to see if
  // `x86::Assembler` can properly encode all possible combinations. If the given
  // `useRexRegs` argument is true the `A` version will in most cases contain
  // a register having index 8 or greater to force REX prefix.
  Gp gLoA = useRex1 ? r8b : al;
  Gp gLoB = useRex2 ? r9b : bl;

  Gp gHiA = ah;
  Gp gHiB = bh;

  Gp gwA = useRex1 ? r8w  : ax;
  Gp gwB = useRex2 ? r9w  : bx;

  Gp gdA = useRex1 ? r8d  : eax;
  Gp gdB = useRex2 ? r9d  : ebx;
  Gp gdC = useRex2 ? r10d : ecx;

  Gp gzA = useRex1 ? r8.as<Gp>()  : e->zax();
  Gp gzB = useRex2 ? r9.as<Gp>()  : e->zbx();
  Gp gzC = useRex2 ? r10.as<Gp>() : e->zcx();
  Gp gzD = useRex2 ? r11.as<Gp>() : e->zdx();

  KReg kA = k1;
  KReg kB = k2;
  KReg kC = k3;

  Mem anyptr_gpA = ptr(gzA);
  Mem anyptr_gpB = ptr(gzB);
  Mem anyptr_gpC = ptr(gzC);
  Mem anyptr_gpD = ptr(gzD);

  Mem intptr_gpA = e->intptr_ptr(gzA);
  Mem intptr_gpB = e->intptr_ptr(gzB);

  St stA = st0;
  St stB = st7;

  Mm mmA = mm0;
  Mm mmB = mm1;

  Xmm xmmA = useRex1 ? xmm8  : xmm0;
  Xmm xmmB = useRex2 ? xmm9  : xmm1;
  Xmm xmmC = useRex2 ? xmm10 : xmm2;
  Xmm xmmD = useRex2 ? xmm11 : xmm3;

  Ymm ymmA = useRex1 ? ymm8  : ymm0;
  Ymm ymmB = useRex2 ? ymm9  : ymm1;
  Ymm ymmC = useRex2 ? ymm10 : ymm2;
  Ymm ymmD = useRex2 ? ymm11 : ymm3;

  Zmm zmmA = useRex1 ? zmm8  : zmm0;
  Zmm zmmB = useRex2 ? zmm9  : zmm1;
  Zmm zmmC = useRex2 ? zmm10 : zmm2;

  Mem vx_ptr = ptr(gzB, xmmB);
  Mem vy_ptr = ptr(gzB, ymmB);
  Mem vz_ptr = ptr(gzB, zmmB);

  Label L;

  // Base.
  e->adc(gLoA, 1);
  e->adc(gLoB, 1);
  e->adc(gHiA, 1);
  e->adc(gHiB, 1);
  e->adc(gwA, 1);
  e->adc(gwB, 1);
  e->adc(gdA, 1);
  e->adc(gdB, 1);
  e->adc(gzA, 1);
  e->adc(gzA, gzB);
  e->adc(gzA, intptr_gpB);
  e->adc(intptr_gpA, 1);
  e->adc(intptr_gpA, gzB);
  e->add(gLoA, 1);
  e->add(gLoB, 1);
  e->add(gHiA, 1);
  e->add(gHiB, 1);
  e->add(gwA, 1);
  e->add(gwB, 1);
  e->add(gdA, 1);
  e->add(gdB, 1);
  e->add(gzA, 1);
  e->add(gzA, gzB);
  e->add(gzA, intptr_gpB);
  e->add(intptr_gpA, 1);
  e->add(intptr_gpA, gzB);
  e->and_(gLoA, 1);
  e->and_(gLoB, 1);
  e->and_(gHiA, 1);
  e->and_(gHiB, 1);
  e->and_(gwA, 1);
  e->and_(gwB, 1);
  e->and_(gdA, 1);
  e->and_(gdB, 1);
  e->and_(gzA, 1);
  e->and_(gzA, gzB);
  e->and_(gzA, intptr_gpB);
  e->and_(intptr_gpA, 1);
  e->and_(intptr_gpA, gzB);
  e->bswap(gzA);
  e->bt(gdA, 1);
  e->bt(gzA, 1);
  e->bt(gdA, gdB);
  e->bt(gzA, gzB);
  e->bt(intptr_gpA, 1);
  e->bt(anyptr_gpA, gdB);
  e->bt(intptr_gpA, gzB);
  e->btc(gdA, 1);
  e->btc(gzA, 1);
  e->btc(gdA, gdB);
  e->btc(gzA, gzB);
  e->btc(intptr_gpA, 1);
  e->btc(anyptr_gpA, gdB);
  e->btc(intptr_gpA, gzB);
  e->btr(gdA, 1);
  e->btr(gzA, 1);
  e->btr(gdA, gdB);
  e->btr(gzA, gzB);
  e->btr(intptr_gpA, 1);
  e->btr(anyptr_gpA, gdB);
  e->btr(intptr_gpA, gzB);
  e->bts(gdA, 1);
  e->bts(gzA, 1);
  e->bts(gdA, gdB);
  e->bts(gzA, gzB);
  e->bts(intptr_gpA, 1);
  e->bts(anyptr_gpA, gdB);
  e->bts(intptr_gpA, gzB);
  e->call(gzA);
  e->call(intptr_gpA);
  e->cbw();                                 // Implicit AX      <- Sign Extend AL.
  e->cbw(ax);                               // Explicit AX      <- Sign Extend AL.
  e->cdq();                                 // Implicit EDX:EAX <- Sign Extend EAX.
  e->cdq(edx, eax);                         // Explicit EDX:EAX <- Sign Extend EAX.
  if (isX64) e->cdqe();                     // Implicit RAX     <- Sign Extend EAX.
  if (isX64) e->cdqe(eax);                  // Explicit RAX     <- Sign Extend EAX.
  e->cwd();                                 // Implicit DX:AX   <- Sign Extend AX.
  e->cwd(dx, ax);                           // Explicit DX:AX   <- Sign Extend AX.
  e->cwde();                                // Implicit EAX     <- Sign Extend AX.
  e->cwde(eax);                             // Explicit EAX     <- Sign Extend AX.
  if (isX64) e->cqo();                      // Implicit RDX:RAX <- Sign Extend RAX.
  if (isX64) e->cqo(rdx, rax);              // Explicit RDX:RAX <- Sign Extend RAX.
  e->clc();
  e->cld();
  e->cmc();
  e->cmp(gLoA, 1);
  e->cmp(gLoB, 1);
  e->cmp(gHiA, 1);
  e->cmp(gHiB, 1);
  e->cmp(gwA, 1);
  e->cmp(gwB, 1);
  e->cmp(gdA, 1);
  e->cmp(gdB, 1);
  e->cmp(gzA, 1);
  e->cmp(gLoA, gLoB);
  e->cmp(gHiA, gHiB);
  e->cmp(gwA, gwB);
  e->cmp(gdA, gdB);
  e->cmp(gzA, gzB);
  e->cmp(gdA, anyptr_gpB);
  e->cmp(gzA, intptr_gpB);
  e->cmp(intptr_gpA, 1);
  e->cmp(anyptr_gpA, gdB);
  e->cmp(intptr_gpA, gzB);
  e->cmpxchg(gdA, gdB);                     // Implicit regA, regB, <EAX>
  e->cmpxchg(gzA, gzB);                     // Implicit regA, regB, <ZAX>
  e->cmpxchg(gdA, gdB, eax);                // Explicit regA, regB, <EAX>
  e->cmpxchg(gzA, gzB, e->zax());           // Explicit regA, regB, <ZAX>
  e->cmpxchg(anyptr_gpA, gdB);              // Implicit mem , regB, <EAX>
  e->cmpxchg(anyptr_gpA, gzB);              // Implicit mem , regB, <ZAX>
  e->cmpxchg(anyptr_gpA, gdB, eax);         // Explicit mem , regB, <EAX>
  e->cmpxchg(anyptr_gpA, gzB, e->zax());    // Explicit mem , regB, <ZAX>
  e->cmpxchg8b(anyptr_gpA);                 // Implicit mem , <EDX>, <EAX>, <ECX>, <EBX>
  e->cmpxchg8b(anyptr_gpA,
               x86::edx, x86::eax,
               x86::ecx, x86::ebx);         // Explicit mem , <EDX>, <EAX>, <ECX>, <EBX>
  if (isX64) e->cmpxchg16b(anyptr_gpA);     // Implicit mem , <RDX>, <RAX>, <RCX>, <RBX>
  if (isX64) e->cmpxchg16b(anyptr_gpA,
               x86::rdx, x86::rax,
               x86::rcx, x86::rbx);         // Explicit mem , <EDX>, <EAX>, <ECX>, <EBX>
  e->cpuid();                               // Implicit <EAX>, <EBX>, <ECX>, <EDX>
  e->cpuid(eax, ebx, ecx, edx);             // Explicit <EAX>, <EBX>, <ECX>, <EDX>
  e->crc32(gdA, byte_ptr(gzB));
  e->crc32(gdA, word_ptr(gzB));
  e->crc32(gdA, dword_ptr(gzB));
  if (isX64) e->crc32(gdA, qword_ptr(gzB));
  if (isX64) e->crc32(gzA, qword_ptr(gzB));
  e->dec(gLoA);
  e->dec(gHiA);
  e->dec(gwA);
  e->dec(gdA);
  e->dec(gzA);
  e->dec(intptr_gpA);
  e->inc(gLoA);
  e->inc(gwA);
  e->inc(gdA);
  e->inc(gzA);
  e->inc(intptr_gpA);
  e->int_(13);
  e->int3();
  e->into();
  e->lea(gzA, intptr_gpB);
  e->mov(gLoA, 1);
  e->mov(gHiA, 1);
  e->mov(gwA, 1);
  e->mov(gdA, 1);
  e->mov(gzA, 1);
  e->mov(gLoA, gLoB);
  e->mov(gHiA, gHiB);
  e->mov(gwA, gwB);
  e->mov(gdA, gdB);
  e->mov(gzA, gzB);
  e->mov(gLoA, anyptr_gpB);
  e->mov(gwA, anyptr_gpB);
  e->mov(gdA, anyptr_gpB);
  e->mov(gzA, intptr_gpB);
  e->mov(anyptr_gpA, gLoB);
  e->mov(anyptr_gpA, gwB);
  e->mov(anyptr_gpA, gdB);
  e->mov(intptr_gpA, 1);
  e->mov(intptr_gpA, gzB);
  e->movsx(gzA, gLoB);
  e->movsx(gzA, byte_ptr(gzB));
  e->movzx(gzA, gLoB);
  e->movzx(gzA, byte_ptr(gzB));
  e->movbe(gzA, anyptr_gpB);
  e->movbe(anyptr_gpA, gzB);
  e->neg(gzA);
  e->neg(intptr_gpA);
  e->nop();
  e->not_(gzA);
  e->not_(intptr_gpA);
  e->or_(gLoA, 1);
  e->or_(gLoB, 1);
  e->or_(gHiA, 1);
  e->or_(gHiB, 1);
  e->or_(gwA, 1);
  e->or_(gwB, 1);
  e->or_(gdA, 1);
  e->or_(gdB, 1);
  e->or_(gzA, 1);
  e->or_(gzA, gzB);
  e->or_(gzA, intptr_gpB);
  e->or_(intptr_gpA, 1);
  e->or_(intptr_gpA, gzB);
  e->pop(gzA);
  e->pop(intptr_gpA);
  if (!isX64) e->popa();
  if (!isX64) e->popad();
  e->popf();
  if (!isX64) e->popfd();
  if ( isX64) e->popfq();
  e->push(gzA);
  e->push(intptr_gpA);
  e->push(0);
  if (!isX64) e->pusha();
  if (!isX64) e->pushad();
  e->pushf();
  if (!isX64) e->pushfd();
  if ( isX64) e->pushfq();
  e->rcl(gdA, 0);
  e->rcl(gzA, 0);
  e->rcl(gdA, 1);
  e->rcl(gzA, 1);
  e->rcl(gdA, cl);
  e->rcl(gzA, cl);
  e->rcl(intptr_gpA, 0);
  e->rcl(intptr_gpA, 1);
  e->rcl(intptr_gpA, cl);
  e->rcr(gdA, 0);
  e->rcr(gzA, 0);
  e->rcr(gdA, 1);
  e->rcr(gzA, 1);
  e->rcr(gdA, cl);
  e->rcr(gzA, cl);
  e->rcr(intptr_gpA, 0);
  e->rcr(intptr_gpA, 1);
  e->rcr(intptr_gpA, cl);
  e->rdtsc();                               // Implicit <EDX:EAX>
  e->rdtsc(edx, eax);                       // Explicit <EDX:EAX>
  e->rdtscp();                              // Implicit <EDX:EAX>, <ECX>
  e->rdtscp(edx, eax, ecx);                 // Implicit <EDX:EAX>, <ECX>
  e->ret();
  e->ret(0);
  e->rol(gdA, 0);
  e->rol(gzA, 0);
  e->rol(gdA, 1);
  e->rol(gzA, 1);
  e->rol(gdA, cl);
  e->rol(gzA, cl);
  e->rol(intptr_gpA, 0);
  e->rol(intptr_gpA, 1);
  e->rol(intptr_gpA, cl);
  e->ror(gdA, 0);
  e->ror(gzA, 0);
  e->ror(gdA, 1);
  e->ror(gzA, 1);
  e->ror(gdA, cl);
  e->ror(gzA, cl);
  e->ror(intptr_gpA, 0);
  e->ror(intptr_gpA, 1);
  e->ror(intptr_gpA, cl);
  e->sbb(gLoA, 1);
  e->sbb(gLoB, 1);
  e->sbb(gHiA, 1);
  e->sbb(gHiB, 1);
  e->sbb(gwA, 1);
  e->sbb(gwB, 1);
  e->sbb(gdA, 1);
  e->sbb(gdB, 1);
  e->sbb(gzA, 1);
  e->sbb(gzA, gzB);
  e->sbb(gzA, intptr_gpB);
  e->sbb(intptr_gpA, 1);
  e->sbb(intptr_gpA, gzB);
  e->sal(gdA, 0);
  e->sal(gzA, 0);
  e->sal(gdA, 1);
  e->sal(gzA, 1);
  e->sal(gdA, cl);
  e->sal(gzA, cl);
  e->sal(intptr_gpA, 0);
  e->sal(intptr_gpA, 1);
  e->sal(intptr_gpA, cl);
  e->sar(gdA, 0);
  e->sar(gzA, 0);
  e->sar(gdA, 1);
  e->sar(gzA, 1);
  e->sar(gdA, cl);
  e->sar(gzA, cl);
  e->sar(intptr_gpA, 0);
  e->sar(intptr_gpA, 1);
  e->sar(intptr_gpA, cl);
  e->shl(gdA, 0);
  e->shl(gzA, 0);
  e->shl(gdA, 1);
  e->shl(gzA, 1);
  e->shl(gdA, cl);
  e->shl(gzA, cl);
  e->shl(intptr_gpA, 0);
  e->shl(intptr_gpA, 1);
  e->shl(intptr_gpA, cl);
  e->shr(gdA, 0);
  e->shr(gzA, 0);
  e->shr(gdA, 1);
  e->shr(gzA, 1);
  e->shr(gdA, cl);
  e->shr(gzA, cl);
  e->shr(intptr_gpA, 0);
  e->shr(intptr_gpA, 1);
  e->shr(intptr_gpA, cl);
  e->shld(gdA, gdB, 0);
  e->shld(gzA, gzB, 0);
  e->shld(gdA, gdB, cl);
  e->shld(gzA, gzB, cl);
  e->shld(anyptr_gpA, gdB, 0);
  e->shld(intptr_gpA, gzB, 0);
  e->shld(anyptr_gpA, gdB, cl);
  e->shld(intptr_gpA, gzB, cl);
  e->shrd(gdA, gdB, 0);
  e->shrd(gzA, gzB, 0);
  e->shrd(gdA, gdB, cl);
  e->shrd(gzA, gzB, cl);
  e->shrd(anyptr_gpA, gdB, 0);
  e->shrd(intptr_gpA, gzB, 0);
  e->shrd(anyptr_gpA, gdB, cl);
  e->shrd(intptr_gpA, gzB, cl);
  e->stc();
  e->std();
  e->sti();
  e->sub(gLoA, 1);
  e->sub(gLoB, 1);
  e->sub(gHiA, 1);
  e->sub(gHiB, 1);
  e->sub(gwA, 1);
  e->sub(gwB, 1);
  e->sub(gdA, 1);
  e->sub(gdB, 1);
  e->sub(gzA, 1);
  e->sub(gzA, gzB);
  e->sub(gzA, intptr_gpB);
  e->sub(intptr_gpA, 1);
  e->sub(intptr_gpA, gzB);
  e->swapgs();
  e->test(gzA, 1);
  e->test(gzA, gzB);
  e->test(intptr_gpA, 1);
  e->test(intptr_gpA, gzB);
  e->ud2();
  e->xadd(gzA, gzB);
  e->xadd(intptr_gpA, gzB);
  e->xchg(gzA, gzB);
  e->xchg(intptr_gpA, gzB);
  e->xchg(gzA, intptr_gpB);
  e->xor_(gLoA, 1);
  e->xor_(gLoB, 1);
  e->xor_(gHiA, 1);
  e->xor_(gHiB, 1);
  e->xor_(gwA, 1);
  e->xor_(gwB, 1);
  e->xor_(gdA, 1);
  e->xor_(gdB, 1);
  e->xor_(gzA, 1);
  e->xor_(gzA, gzB);
  e->xor_(gzA, intptr_gpB);
  e->xor_(intptr_gpA, 1);
  e->xor_(intptr_gpA, gzB);

  // Special case - div|mul.
  e->div(cl);                               // Implicit AH:AL <- AX * r8
  e->div(byte_ptr(gzA));                    // Implicit AH:AL <- AX * m8
  e->div(ax, cl);                           // Explicit AH:AL <- AX * r8
  e->div(ax, anyptr_gpA);                   // Explicit AH:AL <- AX * m8

  e->div(cx);                               // Implicit DX:AX <- DX:AX * r16
  e->div(word_ptr(gzA));                    // Implicit DX:AX <- DX:AX * m16
  e->div(dx, ax, cx);                       // Explicit DX:AX <- DX:AX * r16
  e->div(dx, ax, anyptr_gpA);               // Explicit DX:AX <- DX:AX * m16

  e->div(ecx);                              // Implicit EDX:EAX <- EDX:EAX * r32
  e->div(dword_ptr(gzA));                   // Implicit EDX:EAX <- EDX:EAX * m32
  e->div(edx, eax, ecx);                    // Explicit EDX:EAX <- EDX:EAX * r32
  e->div(edx, eax, anyptr_gpA);             // Explicit EDX:EAX <- EDX:EAX * m32

  if (isX64) e->div(rcx);                   // Implicit RDX|RAX <- RDX:RAX * r64
  if (isX64) e->div(qword_ptr(gzA));        // Implicit RDX|RAX <- RDX:RAX * m64
  if (isX64) e->div(rdx, rax, rcx);         // Explicit RDX|RAX <- RDX:RAX * r64
  if (isX64) e->div(rdx, rax, anyptr_gpA);  // Explicit RDX|RAX <- RDX:RAX * m64

  e->idiv(cl);                              // Implicit AH:AL <- AX * r8
  e->idiv(byte_ptr(gzA));                   // Implicit AH:AL <- AX * m8
  e->idiv(ax, cl);                          // Explicit AH:AL <- AX * r8
  e->idiv(ax, anyptr_gpA);                  // Explicit AH:AL <- AX * m8

  e->idiv(cx);                              // Implicit DX:AX <- DX:AX * r16
  e->idiv(word_ptr(gzA));                   // Implicit DX:AX <- DX:AX * m16
  e->idiv(dx, ax, cx);                      // Explicit DX:AX <- DX:AX * r16
  e->idiv(dx, ax, anyptr_gpA);              // Explicit DX:AX <- DX:AX * m16

  e->idiv(ecx);                             // Implicit EDX:EAX <- EDX:EAX * r32
  e->idiv(dword_ptr(gzA));                  // Implicit EDX:EAX <- EDX:EAX * m32
  e->idiv(edx, eax, ecx);                   // Explicit EDX:EAX <- EDX:EAX * r32
  e->idiv(edx, eax, anyptr_gpA);            // Explicit EDX:EAX <- EDX:EAX * m32

  if (isX64) e->idiv(rcx);                  // Implicit RDX|RAX <- RDX:RAX * r64
  if (isX64) e->idiv(qword_ptr(gzA));       // Implicit RDX|RAX <- RDX:RAX * m64
  if (isX64) e->idiv(rdx, rax, rcx);        // Explicit RDX|RAX <- RDX:RAX * r64
  if (isX64) e->idiv(rdx, rax, anyptr_gpA); // Explicit RDX|RAX <- RDX:RAX * m64

  e->mul(cl);                               // Implicit AX <- AL * r8
  e->mul(byte_ptr(gzA));                    // Implicit AX <- AL * m8
  e->mul(ax, cl);                           // Explicit AX <- AL * r8
  e->mul(ax, anyptr_gpA);                   // Explicit AX <- AL * m8

  e->mul(cx);                               // Implicit DX:AX <- AX * r16
  e->mul(word_ptr(gzA));                    // Implicit DX:AX <- AX * m16
  e->mul(dx, ax, cx);                       // Explicit DX:AX <- AX * r16
  e->mul(dx, ax, anyptr_gpA);               // Explicit DX:AX <- AX * m16

  e->mul(ecx);                              // Implicit EDX:EAX <- EAX * r32
  e->mul(dword_ptr(gzA));                   // Implicit EDX:EAX <- EAX * m32
  e->mul(edx, eax, ecx);                    // Explicit EDX:EAX <- EAX * r32
  e->mul(edx, eax, anyptr_gpA);             // Explicit EDX:EAX <- EAX * m32

  if (isX64) e->mul(rcx);                   // Implicit RDX|RAX <- RAX * r64
  if (isX64) e->mul(qword_ptr(gzA));        // Implicit RDX|RAX <- RAX * m64
  if (isX64) e->mul(rdx, rax, rcx);         // Explicit RDX|RAX <- RAX * r64
  if (isX64) e->mul(rdx, rax, anyptr_gpA);  // Explicit RDX|RAX <- RAX * m64

  e->imul(gdA);
  e->imul(gzA);
  e->imul(intptr_gpA);
  e->imul(gdA, 1);
  e->imul(gzA, 1);
  e->imul(gdA, gdB);
  e->imul(gzA, gzB);
  e->imul(gdA, gdB, 1);
  e->imul(gzA, gzB, 1);
  e->imul(gdA, anyptr_gpB);
  e->imul(gzA, intptr_gpB);
  e->imul(gdA, anyptr_gpB, 1);
  e->imul(gzA, intptr_gpB, 1);

  // Special case - zero-extend 32-bit immediate instead of sign-extend:
  if (isX64) e->mov(gzA, uint32_t(0xFEEDFEED));
  if (isX64) e->and_(gzA, uint32_t(0xFEEDFEED));

  // Special case - mov with absolute 32-bit address.
  e->mov(al , ptr(0x01020304u));
  e->mov(ax , ptr(0x01020304u));
  e->mov(eax, ptr(0x01020304u));
  e->mov(ptr(0x01020304u), al );
  e->mov(ptr(0x01020304u), ax );
  e->mov(ptr(0x01020304u), eax);

  // Special case - mov with absolute 64-bit address.
  if (isX64) e->mov(al , ptr(0x0102030405060708u));
  if (isX64) e->mov(ax , ptr(0x0102030405060708u));
  if (isX64) e->mov(eax, ptr(0x0102030405060708u));
  if (isX64) e->mov(rax, ptr(0x0102030405060708u));
  if (isX64) e->mov(ptr(0x0102030405060708u), al );
  if (isX64) e->mov(ptr(0x0102030405060708u), ax );
  if (isX64) e->mov(ptr(0x0102030405060708u), eax);
  if (isX64) e->mov(ptr(0x0102030405060708u), rax);

  // Control registers.
  e->nop();

  e->mov(gzA, cr0);
  e->mov(cr0, gzA);
  if (isX64) e->mov(gzA, cr8);
  if (isX64) e->mov(cr8, gzA);

  // Debug registers.
  e->nop();

  e->mov(gzA, dr0);
  e->mov(dr0, gzA);

  // Segment registers.
  e->nop();

  if (!isX64) e->mov(es, ax);
  if (!isX64) e->mov(es, bx);
  if (!isX64) e->mov(ax, es);
  if (!isX64) e->mov(bx, es);

  if (!isX64) e->mov(cs, ax);
  if (!isX64) e->mov(cs, bx);
  if (!isX64) e->mov(ax, cs);
  if (!isX64) e->mov(bx, cs);

  if (!isX64) e->mov(ss, ax);
  if (!isX64) e->mov(ss, bx);
  if (!isX64) e->mov(ax, ss);
  if (!isX64) e->mov(bx, ss);

  if (!isX64) e->mov(ds, ax);
  if (!isX64) e->mov(ds, bx);
  if (!isX64) e->mov(ax, ds);
  if (!isX64) e->mov(bx, ds);

  e->mov(fs, ax);
  e->mov(fs, bx);
  e->mov(ax, fs);
  e->mov(bx, fs);

  e->mov(gs, ax);
  e->mov(gs, bx);
  e->mov(ax, gs);
  e->mov(bx, gs);

  // Instructions using REP prefix.
  e->nop();

  e->in(al, 0);
  e->in(al, dx);
  e->in(ax, 0);
  e->in(ax, dx);
  e->in(eax, 0);
  e->in(eax, dx);
  e->rep().ins(byte_ptr(e->zdi()), dx);
  e->rep().ins(word_ptr(e->zdi()), dx);
  e->rep().ins(dword_ptr(e->zdi()), dx);

  e->out(imm(0), al);
  e->out(dx, al);
  e->out(imm(0), ax);
  e->out(dx, ax);
  e->out(imm(0), eax);
  e->out(dx, eax);
  e->rep().outs(dx, byte_ptr(e->zsi()));
  e->rep().outs(dx, word_ptr(e->zsi()));
  e->rep().outs(dx, dword_ptr(e->zsi()));

  e->lodsb();
  e->lodsd();
  e->lodsw();
  e->rep().lodsb();
  e->rep().lodsd();
  e->rep().lodsw();
  if (isX64) e->rep().lodsq();

  e->movsb();
  e->movsd();
  e->movsw();
  e->rep().movsb();
  e->rep().movsd();
  e->rep().movsw();
  if (isX64) e->rep().movsq();

  e->stosb();
  e->stosd();
  e->stosw();
  e->rep().stosb();
  e->rep().stosd();
  e->rep().stosw();
  if (isX64) e->rep().stosq();

  e->cmpsb();
  e->cmpsd();
  e->cmpsw();
  e->repz().cmpsb();
  e->repz().cmpsd();
  e->repz().cmpsw();
  if (isX64) e->repz().cmpsq();
  e->repnz().cmpsb();
  e->repnz().cmpsd();
  e->repnz().cmpsw();
  if (isX64) e->repnz().cmpsq();

  e->scasb();
  e->scasd();
  e->scasw();
  e->repz().scasb();
  e->repz().scasd();
  e->repz().scasw();
  if (isX64) e->repz().scasq();
  e->repnz().scasb();
  e->repnz().scasd();
  e->repnz().scasw();
  if (isX64) e->repnz().scasq();

  // Label...Jcc/Jecxz/Jmp.
  e->nop();

  L = e->newLabel();
  e->bind(L);
  e->ja(L);
  e->jae(L);
  e->jb(L);
  e->jbe(L);
  e->jc(L);
  e->je(L);
  e->jg(L);
  e->jge(L);
  e->jl(L);
  e->jle(L);
  e->jna(L);
  e->jnae(L);
  e->jnb(L);
  e->jnbe(L);
  e->jnc(L);
  e->jne(L);
  e->jng(L);
  e->jnge(L);
  e->jnl(L);
  e->jnle(L);
  e->jno(L);
  e->jnp(L);
  e->jns(L);
  e->jnz(L);
  e->jo(L);
  e->jp(L);
  e->jpe(L);
  e->jpo(L);
  e->js(L);
  e->jz(L);
  e->jecxz(ecx, L);
  e->jmp(L);

  // Jcc/Jecxz/Jmp...Label.
  e->nop();

  L = e->newLabel();
  e->ja(L);
  e->jae(L);
  e->jb(L);
  e->jbe(L);
  e->jc(L);
  e->je(L);
  e->jg(L);
  e->jge(L);
  e->jl(L);
  e->jle(L);
  e->jna(L);
  e->jnae(L);
  e->jnb(L);
  e->jnbe(L);
  e->jnc(L);
  e->jne(L);
  e->jng(L);
  e->jnge(L);
  e->jnl(L);
  e->jnle(L);
  e->jno(L);
  e->jnp(L);
  e->jns(L);
  e->jnz(L);
  e->jo(L);
  e->jp(L);
  e->jpe(L);
  e->jpo(L);
  e->js(L);
  e->jz(L);
  e->jecxz(ecx, L);
  e->jmp(L);
  e->bind(L);

  // FPU.
  e->nop();

  e->f2xm1();
  e->fabs();
  e->fadd(stA, stB);
  e->fadd(stB, stA);
  e->fadd(dword_ptr(gzA));
  e->fadd(qword_ptr(gzA));
  e->faddp(stB);
  e->faddp();
  e->fbld(dword_ptr(gzA));
  e->fbstp(dword_ptr(gzA));
  e->fchs();
  e->fclex();
  e->fcom(stB);
  e->fcom();
  e->fcom(dword_ptr(gzA));
  e->fcom(qword_ptr(gzA));
  e->fcomp(stB);
  e->fcomp();
  e->fcomp(dword_ptr(gzA));
  e->fcomp(qword_ptr(gzA));
  e->fcompp();
  e->fcos();
  e->fdecstp();
  e->fdiv(stA, stB);
  e->fdiv(stB, stA);
  e->fdiv(dword_ptr(gzA));
  e->fdiv(qword_ptr(gzA));
  e->fdivp(stB);
  e->fdivp();
  e->fdivr(stA, stB);
  e->fdivr(stB, stA);
  e->fdivr(dword_ptr(gzA));
  e->fdivr(qword_ptr(gzA));
  e->fdivrp(stB);
  e->fdivrp();
  e->fiadd(dword_ptr(gzA));
  e->ficom(word_ptr(gzA));
  e->ficom(dword_ptr(gzA));
  e->ficomp(word_ptr(gzA));
  e->ficomp(dword_ptr(gzA));
  e->fidiv(word_ptr(gzA));
  e->fidiv(dword_ptr(gzA));
  e->fidivr(word_ptr(gzA));
  e->fidivr(dword_ptr(gzA));
  e->fild(word_ptr(gzA));
  e->fild(dword_ptr(gzA));
  e->fild(qword_ptr(gzA));
  e->fimul(word_ptr(gzA));
  e->fimul(dword_ptr(gzA));
  e->fincstp();
  e->finit();
  e->fninit();
  e->fisub(word_ptr(gzA));
  e->fisub(dword_ptr(gzA));
  e->fisubr(word_ptr(gzA));
  e->fisubr(dword_ptr(gzA));
  e->fist(word_ptr(gzA));
  e->fist(dword_ptr(gzA));
  e->fistp(word_ptr(gzA));
  e->fistp(dword_ptr(gzA));
  e->fistp(qword_ptr(gzA));
  e->fld(dword_ptr(gzA));
  e->fld(qword_ptr(gzA));
  e->fld(tword_ptr(gzA));
  e->fld1();
  e->fldl2t();
  e->fldl2e();
  e->fldpi();
  e->fldlg2();
  e->fldln2();
  e->fldz();
  e->fldcw(anyptr_gpA);
  e->fldenv(anyptr_gpA);
  e->fmul(stA, stB);
  e->fmul(stB, stA);
  e->fmul(dword_ptr(gzA));
  e->fmul(qword_ptr(gzA));
  e->fmulp(stB);
  e->fmulp();
  e->fnclex();
  e->fnop();
  e->fnsave(anyptr_gpA);
  e->fnstenv(anyptr_gpA);
  e->fnstcw(anyptr_gpA);
  e->fpatan();
  e->fprem();
  e->fprem1();
  e->fptan();
  e->frndint();
  e->frstor(anyptr_gpA);
  e->fsave(anyptr_gpA);
  e->fscale();
  e->fsin();
  e->fsincos();
  e->fsqrt();
  e->fst(dword_ptr(gzA));
  e->fst(qword_ptr(gzA));
  e->fstp(dword_ptr(gzA));
  e->fstp(qword_ptr(gzA));
  e->fstp(tword_ptr(gzA));
  e->fstcw(anyptr_gpA);
  e->fstenv(anyptr_gpA);
  e->fsub(stA, stB);
  e->fsub(stB, stA);
  e->fsub(dword_ptr(gzA));
  e->fsub(qword_ptr(gzA));
  e->fsubp(stB);
  e->fsubp();
  e->fsubr(stA, stB);
  e->fsubr(stB, stA);
  e->fsubr(dword_ptr(gzA));
  e->fsubr(qword_ptr(gzA));
  e->fsubrp(stB);
  e->fsubrp();
  e->ftst();
  e->fucom(stB);
  e->fucom();
  e->fucom(stB);
  e->fucomi(stB);
  e->fucomip(stB);
  e->fucomp(stB);
  e->fucompp();
  e->fxam();
  e->fxtract();
  e->fyl2x();
  e->fyl2xp1();

  // LAHF/SAHF
  e->lahf();                                // Implicit <AH>
  e->lahf(ah);                              // Explicit <AH>
  e->sahf();                                // Implicit <AH>
  e->sahf(ah);                              // Explicit <AH>

  // FXSR.
  e->fxrstor(anyptr_gpA);
  e->fxsave(anyptr_gpA);

  // XSAVE.
  e->nop();

  e->xgetbv();                              // Implicit <EDX:EAX>, <ECX>
  e->xgetbv(edx, eax, ecx);                 // Explicit <EDX:EAX>, <ECX>

  e->xsetbv();                              // Implicit <EDX:EAX>, <ECX>
  e->xsetbv(edx, eax, ecx);                 // Explicit <EDX:EAX>, <ECX>

  e->xrstor(anyptr_gpA);                    // Implicit <EDX:EAX>
  e->xrstors(anyptr_gpA);                   // Implicit <EDX:EAX>
  e->xsave(anyptr_gpA);                     // Implicit <EDX:EAX>
  e->xsavec(anyptr_gpA);                    // Implicit <EDX:EAX>
  e->xsaveopt(anyptr_gpA);                  // Implicit <EDX:EAX>
  e->xsaves(anyptr_gpA);                    // Implicit <EDX:EAX>

  if (isX64) e->xrstor64(anyptr_gpA);       // Implicit <EDX:EAX>
  if (isX64) e->xrstors64(anyptr_gpA);      // Implicit <EDX:EAX>
  if (isX64) e->xsave64(anyptr_gpA);        // Implicit <EDX:EAX>
  if (isX64) e->xsavec64(anyptr_gpA);       // Implicit <EDX:EAX>
  if (isX64) e->xsaveopt64(anyptr_gpA);     // Implicit <EDX:EAX>
  if (isX64) e->xsaves64(anyptr_gpA);       // Implicit <EDX:EAX>

  // POPCNT.
  e->nop();

  e->popcnt(gdA, gdB);
  e->popcnt(gzA, gzB);
  e->popcnt(gdA, anyptr_gpB);
  e->popcnt(gzA, anyptr_gpB);

  // LZCNT.
  e->nop();

  e->lzcnt(gdA, gdB);
  e->lzcnt(gzA, gzB);
  e->lzcnt(gdA, anyptr_gpB);
  e->lzcnt(gzA, anyptr_gpB);

  // BMI.
  e->nop();

  e->andn(gdA, gdB, gdC);
  e->andn(gzA, gzB, gzC);
  e->andn(gdA, gdB, anyptr_gpC);
  e->andn(gzA, gzB, anyptr_gpC);
  e->bextr(gdA, gdB, gdC);
  e->bextr(gzA, gzB, gzC);
  e->bextr(gdA, anyptr_gpB, gdC);
  e->bextr(gzA, anyptr_gpB, gzC);
  e->blsi(gdA, gdB);
  e->blsi(gzA, gzB);
  e->blsi(gdA, anyptr_gpB);
  e->blsi(gzA, anyptr_gpB);
  e->blsmsk(gdA, gdB);
  e->blsmsk(gzA, gzB);
  e->blsmsk(gdA, anyptr_gpB);
  e->blsmsk(gzA, anyptr_gpB);
  e->blsr(gdA, gdB);
  e->blsr(gzA, gzB);
  e->blsr(gdA, anyptr_gpB);
  e->blsr(gzA, anyptr_gpB);
  e->tzcnt(gdA, gdB);
  e->tzcnt(gzA, gzB);
  e->tzcnt(gdA, anyptr_gpB);
  e->tzcnt(gzA, anyptr_gpB);

  // BMI2.
  e->nop();

  e->bzhi(gdA, gdB, gdC);
  e->bzhi(gzA, gzB, gzC);
  e->bzhi(gdA, anyptr_gpB, gdC);
  e->bzhi(gzA, anyptr_gpB, gzC);
  e->mulx(gdA, gdB, gdC);                   // Implicit gpA, gpB, gpC, <EDX>
  e->mulx(gdA, gdB, gdC, edx);              // Explicit gpA, gpB, gpC, <EDX>
  e->mulx(gzA, gzB, gzC);                   // Implicit gpA, gpB, gpC, <EDX|RDX>
  e->mulx(gzA, gzB, gzC, e->zdx());         // Explicit gpA, gpB, gpC, <EDX|RDX>
  e->mulx(gdA, gdB, anyptr_gpC);            // Implicit gpA, gpB, mem, <EDX>
  e->mulx(gdA, gdB, anyptr_gpC, edx);       // Explicit gpA, gpB, mem, <EDX>
  e->mulx(gzA, gzB, anyptr_gpC);            // Implicit gpA, gpB, mem, <EDX|RDX>
  e->mulx(gzA, gzB, anyptr_gpC, e->zdx());  // Explicit gpA, gpB, mem, <EDX|RDX>
  e->pdep(gdA, gdB, gdC);
  e->pdep(gzA, gzB, gzC);
  e->pdep(gdA, gdB, anyptr_gpC);
  e->pdep(gzA, gzB, anyptr_gpC);
  e->pext(gdA, gdB, gdC);
  e->pext(gzA, gzB, gzC);
  e->pext(gdA, gdB, anyptr_gpC);
  e->pext(gzA, gzB, anyptr_gpC);
  e->rorx(gdA, gdB, 0);
  e->rorx(gzA, gzB, 0);
  e->rorx(gdA, anyptr_gpB, 0);
  e->rorx(gzA, anyptr_gpB, 0);
  e->sarx(gdA, gdB, gdC);
  e->sarx(gzA, gzB, gzC);
  e->sarx(gdA, anyptr_gpB, gdC);
  e->sarx(gzA, anyptr_gpB, gzC);
  e->shlx(gdA, gdB, gdC);
  e->shlx(gzA, gzB, gzC);
  e->shlx(gdA, anyptr_gpB, gdC);
  e->shlx(gzA, anyptr_gpB, gzC);
  e->shrx(gdA, gdB, gdC);
  e->shrx(gzA, gzB, gzC);
  e->shrx(gdA, anyptr_gpB, gdC);
  e->shrx(gzA, anyptr_gpB, gzC);

  // ADX.
  e->nop();

  e->adcx(gdA, gdB);
  e->adcx(gzA, gzB);
  e->adcx(gdA, anyptr_gpB);
  e->adcx(gzA, anyptr_gpB);
  e->adox(gdA, gdB);
  e->adox(gzA, gzB);
  e->adox(gdA, anyptr_gpB);
  e->adox(gzA, anyptr_gpB);

  // TBM.
  e->nop();

  e->blcfill(gdA, gdB);
  e->blcfill(gzA, gzB);
  e->blcfill(gdA, anyptr_gpB);
  e->blcfill(gzA, anyptr_gpB);

  e->blci(gdA, gdB);
  e->blci(gzA, gzB);
  e->blci(gdA, anyptr_gpB);
  e->blci(gzA, anyptr_gpB);

  e->blcic(gdA, gdB);
  e->blcic(gzA, gzB);
  e->blcic(gdA, anyptr_gpB);
  e->blcic(gzA, anyptr_gpB);

  e->blcmsk(gdA, gdB);
  e->blcmsk(gzA, gzB);
  e->blcmsk(gdA, anyptr_gpB);
  e->blcmsk(gzA, anyptr_gpB);

  e->blcs(gdA, gdB);
  e->blcs(gzA, gzB);
  e->blcs(gdA, anyptr_gpB);
  e->blcs(gzA, anyptr_gpB);

  e->blsfill(gdA, gdB);
  e->blsfill(gzA, gzB);
  e->blsfill(gdA, anyptr_gpB);
  e->blsfill(gzA, anyptr_gpB);

  e->blsic(gdA, gdB);
  e->blsic(gzA, gzB);
  e->blsic(gdA, anyptr_gpB);
  e->blsic(gzA, anyptr_gpB);

  e->t1mskc(gdA, gdB);
  e->t1mskc(gzA, gzB);
  e->t1mskc(gdA, anyptr_gpB);
  e->t1mskc(gzA, anyptr_gpB);

  e->tzmsk(gdA, gdB);
  e->tzmsk(gzA, gzB);
  e->tzmsk(gdA, anyptr_gpB);
  e->tzmsk(gzA, anyptr_gpB);

  // CLFLUSH / CLFLUSH_OPT.
  e->nop();
  e->clflush(anyptr_gpA);
  e->clflushopt(anyptr_gpA);

  // CLWB.
  e->nop();
  e->clwb(anyptr_gpA);

  // CLZERO.
  e->nop();
  e->clzero();                              // Implicit <ds:[EAX|RAX]>
  e->clzero(ptr(e->zax()));                 // Explicit <ds:[EAX|RAX]>

  // MONITOR[X] / MWAIT[X].
  e->nop();
  e->monitor();                             // Implicit <ds:[EAX|RAX]>, <ECX>, <EDX>
  e->monitorx();                            // Implicit <ds:[EAX|RAX]>, <ECX>, <EDX>
  e->mwait();                               // Implicit <EAX>, <ECX>
  e->mwaitx();                              // Implicit <EAX>, <ECX>, <EBX>

  // PREFETCH / PREFETCHW / PREFETCHWT1.
  e->nop();
  e->prefetch(anyptr_gpA);                  // 3DNOW.
  e->prefetchnta(anyptr_gpA);               // MMX+SSE.
  e->prefetcht0(anyptr_gpA);                // MMX+SSE.
  e->prefetcht1(anyptr_gpA);                // MMX+SSE.
  e->prefetcht2(anyptr_gpA);                // MMX+SSE.
  e->prefetchw(anyptr_gpA);                 // PREFETCHW.
  e->prefetchwt1(anyptr_gpA);               // PREFETCHWT1.

  // RDRAND / RDSEED.
  e->nop();

  e->rdrand(gdA);
  e->rdrand(gzA);
  e->rdseed(gdA);
  e->rdseed(gzA);

  // MMX/MMX2.
  e->nop();

  e->movd(anyptr_gpA, mmB);
  e->movd(gdA, mmB);
  e->movd(mmA, anyptr_gpB);
  e->movd(mmA, gdB);
  e->movq(mmA, mmB);
  e->movq(anyptr_gpA, mmB);
  e->movq(mmA, anyptr_gpB);
  e->packuswb(mmA, mmB);
  e->packuswb(mmA, anyptr_gpB);
  e->paddb(mmA, mmB);
  e->paddb(mmA, anyptr_gpB);
  e->paddw(mmA, mmB);
  e->paddw(mmA, anyptr_gpB);
  e->paddd(mmA, mmB);
  e->paddd(mmA, anyptr_gpB);
  e->paddsb(mmA, mmB);
  e->paddsb(mmA, anyptr_gpB);
  e->paddsw(mmA, mmB);
  e->paddsw(mmA, anyptr_gpB);
  e->paddusb(mmA, mmB);
  e->paddusb(mmA, anyptr_gpB);
  e->paddusw(mmA, mmB);
  e->paddusw(mmA, anyptr_gpB);
  e->pand(mmA, mmB);
  e->pand(mmA, anyptr_gpB);
  e->pandn(mmA, mmB);
  e->pandn(mmA, anyptr_gpB);
  e->pcmpeqb(mmA, mmB);
  e->pcmpeqb(mmA, anyptr_gpB);
  e->pcmpeqw(mmA, mmB);
  e->pcmpeqw(mmA, anyptr_gpB);
  e->pcmpeqd(mmA, mmB);
  e->pcmpeqd(mmA, anyptr_gpB);
  e->pcmpgtb(mmA, mmB);
  e->pcmpgtb(mmA, anyptr_gpB);
  e->pcmpgtw(mmA, mmB);
  e->pcmpgtw(mmA, anyptr_gpB);
  e->pcmpgtd(mmA, mmB);
  e->pcmpgtd(mmA, anyptr_gpB);
  e->pmulhw(mmA, mmB);
  e->pmulhw(mmA, anyptr_gpB);
  e->pmullw(mmA, mmB);
  e->pmullw(mmA, anyptr_gpB);
  e->por(mmA, mmB);
  e->por(mmA, anyptr_gpB);
  e->pmaddwd(mmA, mmB);
  e->pmaddwd(mmA, anyptr_gpB);
  e->pslld(mmA, mmB);
  e->pslld(mmA, anyptr_gpB);
  e->pslld(mmA, 0);
  e->psllq(mmA, mmB);
  e->psllq(mmA, anyptr_gpB);
  e->psllq(mmA, 0);
  e->psllw(mmA, mmB);
  e->psllw(mmA, anyptr_gpB);
  e->psllw(mmA, 0);
  e->psrad(mmA, mmB);
  e->psrad(mmA, anyptr_gpB);
  e->psrad(mmA, 0);
  e->psraw(mmA, mmB);
  e->psraw(mmA, anyptr_gpB);
  e->psraw(mmA, 0);
  e->psrld(mmA, mmB);
  e->psrld(mmA, anyptr_gpB);
  e->psrld(mmA, 0);
  e->psrlq(mmA, mmB);
  e->psrlq(mmA, anyptr_gpB);
  e->psrlq(mmA, 0);
  e->psrlw(mmA, mmB);
  e->psrlw(mmA, anyptr_gpB);
  e->psrlw(mmA, 0);
  e->psubb(mmA, mmB);
  e->psubb(mmA, anyptr_gpB);
  e->psubw(mmA, mmB);
  e->psubw(mmA, anyptr_gpB);
  e->psubd(mmA, mmB);
  e->psubd(mmA, anyptr_gpB);
  e->psubsb(mmA, mmB);
  e->psubsb(mmA, anyptr_gpB);
  e->psubsw(mmA, mmB);
  e->psubsw(mmA, anyptr_gpB);
  e->psubusb(mmA, mmB);
  e->psubusb(mmA, anyptr_gpB);
  e->psubusw(mmA, mmB);
  e->psubusw(mmA, anyptr_gpB);
  e->punpckhbw(mmA, mmB);
  e->punpckhbw(mmA, anyptr_gpB);
  e->punpckhwd(mmA, mmB);
  e->punpckhwd(mmA, anyptr_gpB);
  e->punpckhdq(mmA, mmB);
  e->punpckhdq(mmA, anyptr_gpB);
  e->punpcklbw(mmA, mmB);
  e->punpcklbw(mmA, anyptr_gpB);
  e->punpcklwd(mmA, mmB);
  e->punpcklwd(mmA, anyptr_gpB);
  e->punpckldq(mmA, mmB);
  e->punpckldq(mmA, anyptr_gpB);
  e->pxor(mmA, mmB);
  e->pxor(mmA, anyptr_gpB);
  e->emms();

  // 3DNOW.
  e->nop();

  e->pavgusb(mmA, mmB);
  e->pavgusb(mmA, anyptr_gpB);
  e->pf2id(mmA, mmB);
  e->pf2id(mmA, anyptr_gpB);
  e->pf2iw(mmA, mmB);
  e->pf2iw(mmA, anyptr_gpB);
  e->pfacc(mmA, mmB);
  e->pfacc(mmA, anyptr_gpB);
  e->pfadd(mmA, mmB);
  e->pfadd(mmA, anyptr_gpB);
  e->pfcmpeq(mmA, mmB);
  e->pfcmpeq(mmA, anyptr_gpB);
  e->pfcmpge(mmA, mmB);
  e->pfcmpge(mmA, anyptr_gpB);
  e->pfcmpgt(mmA, mmB);
  e->pfcmpgt(mmA, anyptr_gpB);
  e->pfmax(mmA, mmB);
  e->pfmax(mmA, anyptr_gpB);
  e->pfmin(mmA, mmB);
  e->pfmin(mmA, anyptr_gpB);
  e->pfmul(mmA, mmB);
  e->pfmul(mmA, anyptr_gpB);
  e->pfnacc(mmA, mmB);
  e->pfnacc(mmA, anyptr_gpB);
  e->pfpnacc(mmA, mmB);
  e->pfpnacc(mmA, anyptr_gpB);
  e->pfrcp(mmA, mmB);
  e->pfrcp(mmA, anyptr_gpB);
  e->pfrcpit1(mmA, mmB);
  e->pfrcpit1(mmA, anyptr_gpB);
  e->pfrcpit2(mmA, mmB);
  e->pfrcpit2(mmA, anyptr_gpB);
  e->pfrcpv(mmA, mmB);
  e->pfrcpv(mmA, anyptr_gpB);
  e->pfrsqit1(mmA, mmB);
  e->pfrsqit1(mmA, anyptr_gpB);
  e->pfrsqrt(mmA, mmB);
  e->pfrsqrt(mmA, anyptr_gpB);
  e->pfrsqrtv(mmA, mmB);
  e->pfrsqrtv(mmA, anyptr_gpB);
  e->pfsub(mmA, mmB);
  e->pfsub(mmA, anyptr_gpB);
  e->pfsubr(mmA, mmB);
  e->pfsubr(mmA, anyptr_gpB);
  e->pi2fd(mmA, mmB);
  e->pi2fd(mmA, anyptr_gpB);
  e->pi2fw(mmA, mmB);
  e->pi2fw(mmA, anyptr_gpB);
  e->pmulhrw(mmA, mmB);
  e->pmulhrw(mmA, anyptr_gpB);
  e->pswapd(mmA, mmB);
  e->pswapd(mmA, anyptr_gpB);
  e->femms();

  // SSE.
  e->nop();

  e->addps(xmmA, xmmB);
  e->addps(xmmA, anyptr_gpB);
  e->addss(xmmA, xmmB);
  e->addss(xmmA, anyptr_gpB);
  e->andnps(xmmA, xmmB);
  e->andnps(xmmA, anyptr_gpB);
  e->andps(xmmA, xmmB);
  e->andps(xmmA, anyptr_gpB);
  e->cmpps(xmmA, xmmB, 0);
  e->cmpps(xmmA, anyptr_gpB, 0);
  e->cmpss(xmmA, xmmB, 0);
  e->cmpss(xmmA, anyptr_gpB, 0);
  e->comiss(xmmA, xmmB);
  e->comiss(xmmA, anyptr_gpB);
  e->cvtpi2ps(xmmA, mmB);
  e->cvtpi2ps(xmmA, anyptr_gpB);
  e->cvtps2pi(mmA, xmmB);
  e->cvtps2pi(mmA, anyptr_gpB);
  e->cvtsi2ss(xmmA, gdB);
  e->cvtsi2ss(xmmA, gzB);
  e->cvtsi2ss(xmmA, anyptr_gpB);
  e->cvtss2si(gdA, xmmB);
  e->cvtss2si(gzA, xmmB);
  e->cvtss2si(gdA, anyptr_gpB);
  e->cvtss2si(gzA, anyptr_gpB);
  e->cvttps2pi(mmA, xmmB);
  e->cvttps2pi(mmA, anyptr_gpB);
  e->cvttss2si(gdA, xmmB);
  e->cvttss2si(gzA, xmmB);
  e->cvttss2si(gdA, anyptr_gpB);
  e->cvttss2si(gzA, anyptr_gpB);
  e->divps(xmmA, xmmB);
  e->divps(xmmA, anyptr_gpB);
  e->divss(xmmA, xmmB);
  e->divss(xmmA, anyptr_gpB);
  e->ldmxcsr(anyptr_gpA);
  e->maskmovq(mmA, mmB);                    // Implicit mmA, mmB, <ds:[EDI|RDI]>
  e->maskmovq(mmA, mmB, ptr(e->zdi()));     // Explicit mmA, mmB, <ds:[EDI|RDI]>
  e->maxps(xmmA, xmmB);
  e->maxps(xmmA, anyptr_gpB);
  e->maxss(xmmA, xmmB);
  e->maxss(xmmA, anyptr_gpB);
  e->minps(xmmA, xmmB);
  e->minps(xmmA, anyptr_gpB);
  e->minss(xmmA, xmmB);
  e->minss(xmmA, anyptr_gpB);
  e->movaps(xmmA, xmmB);
  e->movaps(xmmA, anyptr_gpB);
  e->movaps(anyptr_gpA, xmmB);
  e->movd(anyptr_gpA, xmmB);
  e->movd(gdA, xmmB);
  e->movd(gzA, xmmB);
  e->movd(xmmA, anyptr_gpB);
  e->movd(xmmA, gdB);
  e->movd(xmmA, gzB);
  e->movq(mmA, mmB);
  e->movq(xmmA, xmmB);
  e->movq(anyptr_gpA, xmmB);
  e->movq(xmmA, anyptr_gpB);
  e->movntq(anyptr_gpA, mmB);
  e->movhlps(xmmA, xmmB);
  e->movhps(xmmA, anyptr_gpB);
  e->movhps(anyptr_gpA, xmmB);
  e->movlhps(xmmA, xmmB);
  e->movlps(xmmA, anyptr_gpB);
  e->movlps(anyptr_gpA, xmmB);
  e->movntps(anyptr_gpA, xmmB);
  e->movss(xmmA, anyptr_gpB);
  e->movss(anyptr_gpA, xmmB);
  e->movups(xmmA, xmmB);
  e->movups(xmmA, anyptr_gpB);
  e->movups(anyptr_gpA, xmmB);
  e->mulps(xmmA, xmmB);
  e->mulps(xmmA, anyptr_gpB);
  e->mulss(xmmA, xmmB);
  e->mulss(xmmA, anyptr_gpB);
  e->orps(xmmA, xmmB);
  e->orps(xmmA, anyptr_gpB);
  e->pavgb(mmA, mmB);
  e->pavgb(mmA, anyptr_gpB);
  e->pavgw(mmA, mmB);
  e->pavgw(mmA, anyptr_gpB);
  e->pextrw(gdA, mmB, 0);
  e->pextrw(gzA, mmB, 0);
  e->pinsrw(mmA, gdB, 0);
  e->pinsrw(mmA, gzB, 0);
  e->pinsrw(mmA, anyptr_gpB, 0);
  e->pmaxsw(mmA, mmB);
  e->pmaxsw(mmA, anyptr_gpB);
  e->pmaxub(mmA, mmB);
  e->pmaxub(mmA, anyptr_gpB);
  e->pminsw(mmA, mmB);
  e->pminsw(mmA, anyptr_gpB);
  e->pminub(mmA, mmB);
  e->pminub(mmA, anyptr_gpB);
  e->pmovmskb(gdA, mmB);
  e->pmovmskb(gzA, mmB);
  e->pmulhuw(mmA, mmB);
  e->pmulhuw(mmA, anyptr_gpB);
  e->psadbw(mmA, mmB);
  e->psadbw(mmA, anyptr_gpB);
  e->pshufw(mmA, mmB, 0);
  e->pshufw(mmA, anyptr_gpB, 0);
  e->rcpps(xmmA, xmmB);
  e->rcpps(xmmA, anyptr_gpB);
  e->rcpss(xmmA, xmmB);
  e->rcpss(xmmA, anyptr_gpB);
  e->psadbw(xmmA, xmmB);
  e->psadbw(xmmA, anyptr_gpB);
  e->rsqrtps(xmmA, xmmB);
  e->rsqrtps(xmmA, anyptr_gpB);
  e->rsqrtss(xmmA, xmmB);
  e->rsqrtss(xmmA, anyptr_gpB);
  e->sfence();
  e->shufps(xmmA, xmmB, 0);
  e->shufps(xmmA, anyptr_gpB, 0);
  e->sqrtps(xmmA, xmmB);
  e->sqrtps(xmmA, anyptr_gpB);
  e->sqrtss(xmmA, xmmB);
  e->sqrtss(xmmA, anyptr_gpB);
  e->stmxcsr(anyptr_gpA);
  e->subps(xmmA, xmmB);
  e->subps(xmmA, anyptr_gpB);
  e->subss(xmmA, xmmB);
  e->subss(xmmA, anyptr_gpB);
  e->ucomiss(xmmA, xmmB);
  e->ucomiss(xmmA, anyptr_gpB);
  e->unpckhps(xmmA, xmmB);
  e->unpckhps(xmmA, anyptr_gpB);
  e->unpcklps(xmmA, xmmB);
  e->unpcklps(xmmA, anyptr_gpB);
  e->xorps(xmmA, xmmB);
  e->xorps(xmmA, anyptr_gpB);

  // SSE2.
  e->nop();

  e->addpd(xmmA, xmmB);
  e->addpd(xmmA, anyptr_gpB);
  e->addsd(xmmA, xmmB);
  e->addsd(xmmA, anyptr_gpB);
  e->andnpd(xmmA, xmmB);
  e->andnpd(xmmA, anyptr_gpB);
  e->andpd(xmmA, xmmB);
  e->andpd(xmmA, anyptr_gpB);
  e->cmppd(xmmA, xmmB, 0);
  e->cmppd(xmmA, anyptr_gpB, 0);
  e->cmpsd(xmmA, xmmB, 0);
  e->cmpsd(xmmA, anyptr_gpB, 0);
  e->comisd(xmmA, xmmB);
  e->comisd(xmmA, anyptr_gpB);
  e->cvtdq2pd(xmmA, xmmB);
  e->cvtdq2pd(xmmA, anyptr_gpB);
  e->cvtdq2ps(xmmA, xmmB);
  e->cvtdq2ps(xmmA, anyptr_gpB);
  e->cvtpd2dq(xmmA, xmmB);
  e->cvtpd2dq(xmmA, anyptr_gpB);
  e->cvtpd2pi(mmA, xmmB);
  e->cvtpd2pi(mmA, anyptr_gpB);
  e->cvtpd2ps(xmmA, xmmB);
  e->cvtpd2ps(xmmA, anyptr_gpB);
  e->cvtpi2pd(xmmA, mmB);
  e->cvtpi2pd(xmmA, anyptr_gpB);
  e->cvtps2dq(xmmA, xmmB);
  e->cvtps2dq(xmmA, anyptr_gpB);
  e->cvtps2pd(xmmA, xmmB);
  e->cvtps2pd(xmmA, anyptr_gpB);
  e->cvtsd2si(gdA, xmmB);
  e->cvtsd2si(gzA, xmmB);
  e->cvtsd2si(gdA, anyptr_gpB);
  e->cvtsd2si(gzA, anyptr_gpB);
  e->cvtsd2ss(xmmA, xmmB);
  e->cvtsd2ss(xmmA, anyptr_gpB);
  e->cvtsi2sd(xmmA, gdB);
  e->cvtsi2sd(xmmA, gzB);
  e->cvtsi2sd(xmmA, anyptr_gpB);
  e->cvtss2sd(xmmA, xmmB);
  e->cvtss2sd(xmmA, anyptr_gpB);
  e->cvtss2si(gdA, xmmB);
  e->cvtss2si(gzA, xmmB);
  e->cvtss2si(gdA, anyptr_gpB);
  e->cvtss2si(gzA, anyptr_gpB);
  e->cvttpd2pi(mmA, xmmB);
  e->cvttpd2pi(mmA, anyptr_gpB);
  e->cvttpd2dq(xmmA, xmmB);
  e->cvttpd2dq(xmmA, anyptr_gpB);
  e->cvttps2dq(xmmA, xmmB);
  e->cvttps2dq(xmmA, anyptr_gpB);
  e->cvttsd2si(gdA, xmmB);
  e->cvttsd2si(gzA, xmmB);
  e->cvttsd2si(gdA, anyptr_gpB);
  e->cvttsd2si(gzA, anyptr_gpB);
  e->divpd(xmmA, xmmB);
  e->divpd(xmmA, anyptr_gpB);
  e->divsd(xmmA, xmmB);
  e->divsd(xmmA, anyptr_gpB);
  e->lfence();
  e->maskmovdqu(xmmA, xmmB);                // Implicit xmmA, xmmB, <ds:[EDI|RDI]>
  e->maskmovdqu(xmmA, xmmB, ptr(e->zdi())); // Explicit xmmA, xmmB, <ds:[EDI|RDI]>
  e->maxpd(xmmA, xmmB);
  e->maxpd(xmmA, anyptr_gpB);
  e->maxsd(xmmA, xmmB);
  e->maxsd(xmmA, anyptr_gpB);
  e->mfence();
  e->minpd(xmmA, xmmB);
  e->minpd(xmmA, anyptr_gpB);
  e->minsd(xmmA, xmmB);
  e->minsd(xmmA, anyptr_gpB);
  e->movdqa(xmmA, xmmB);
  e->movdqa(xmmA, anyptr_gpB);
  e->movdqa(anyptr_gpA, xmmB);
  e->movdqu(xmmA, xmmB);
  e->movdqu(xmmA, anyptr_gpB);
  e->movdqu(anyptr_gpA, xmmB);
  e->movmskps(gdA, xmmB);
  e->movmskps(gzA, xmmB);
  e->movmskpd(gdA, xmmB);
  e->movmskpd(gzA, xmmB);
  e->movsd(xmmA, xmmB);
  e->movsd(xmmA, anyptr_gpB);
  e->movsd(anyptr_gpA, xmmB);
  e->movapd(xmmA, anyptr_gpB);
  e->movapd(anyptr_gpA, xmmB);
  e->movdq2q(mmA, xmmB);
  e->movq2dq(xmmA, mmB);
  e->movhpd(xmmA, anyptr_gpB);
  e->movhpd(anyptr_gpA, xmmB);
  e->movlpd(xmmA, anyptr_gpB);
  e->movlpd(anyptr_gpA, xmmB);
  e->movntdq(anyptr_gpA, xmmB);
  e->movnti(anyptr_gpA, gdB);
  e->movnti(anyptr_gpA, gzB);
  e->movntpd(anyptr_gpA, xmmB);
  e->movupd(xmmA, anyptr_gpB);
  e->movupd(anyptr_gpA, xmmB);
  e->mulpd(xmmA, xmmB);
  e->mulpd(xmmA, anyptr_gpB);
  e->mulsd(xmmA, xmmB);
  e->mulsd(xmmA, anyptr_gpB);
  e->orpd(xmmA, xmmB);
  e->orpd(xmmA, anyptr_gpB);
  e->packsswb(xmmA, xmmB);
  e->packsswb(xmmA, anyptr_gpB);
  e->packssdw(xmmA, xmmB);
  e->packssdw(xmmA, anyptr_gpB);
  e->packuswb(xmmA, xmmB);
  e->packuswb(xmmA, anyptr_gpB);
  e->paddb(xmmA, xmmB);
  e->paddb(xmmA, anyptr_gpB);
  e->paddw(xmmA, xmmB);
  e->paddw(xmmA, anyptr_gpB);
  e->paddd(xmmA, xmmB);
  e->paddd(xmmA, anyptr_gpB);
  e->paddq(mmA, mmB);
  e->paddq(mmA, anyptr_gpB);
  e->paddq(xmmA, xmmB);
  e->paddq(xmmA, anyptr_gpB);
  e->paddsb(xmmA, xmmB);
  e->paddsb(xmmA, anyptr_gpB);
  e->paddsw(xmmA, xmmB);
  e->paddsw(xmmA, anyptr_gpB);
  e->paddusb(xmmA, xmmB);
  e->paddusb(xmmA, anyptr_gpB);
  e->paddusw(xmmA, xmmB);
  e->paddusw(xmmA, anyptr_gpB);
  e->pand(xmmA, xmmB);
  e->pand(xmmA, anyptr_gpB);
  e->pandn(xmmA, xmmB);
  e->pandn(xmmA, anyptr_gpB);
  e->pause();
  e->pavgb(xmmA, xmmB);
  e->pavgb(xmmA, anyptr_gpB);
  e->pavgw(xmmA, xmmB);
  e->pavgw(xmmA, anyptr_gpB);
  e->pcmpeqb(xmmA, xmmB);
  e->pcmpeqb(xmmA, anyptr_gpB);
  e->pcmpeqw(xmmA, xmmB);
  e->pcmpeqw(xmmA, anyptr_gpB);
  e->pcmpeqd(xmmA, xmmB);
  e->pcmpeqd(xmmA, anyptr_gpB);
  e->pcmpgtb(xmmA, xmmB);
  e->pcmpgtb(xmmA, anyptr_gpB);
  e->pcmpgtw(xmmA, xmmB);
  e->pcmpgtw(xmmA, anyptr_gpB);
  e->pcmpgtd(xmmA, xmmB);
  e->pcmpgtd(xmmA, anyptr_gpB);
  e->pmaxsw(xmmA, xmmB);
  e->pmaxsw(xmmA, anyptr_gpB);
  e->pmaxub(xmmA, xmmB);
  e->pmaxub(xmmA, anyptr_gpB);
  e->pminsw(xmmA, xmmB);
  e->pminsw(xmmA, anyptr_gpB);
  e->pminub(xmmA, xmmB);
  e->pminub(xmmA, anyptr_gpB);
  e->pmovmskb(gdA, xmmB);
  e->pmovmskb(gzA, xmmB);
  e->pmulhw(xmmA, xmmB);
  e->pmulhw(xmmA, anyptr_gpB);
  e->pmulhuw(xmmA, xmmB);
  e->pmulhuw(xmmA, anyptr_gpB);
  e->pmullw(xmmA, xmmB);
  e->pmullw(xmmA, anyptr_gpB);
  e->pmuludq(mmA, mmB);
  e->pmuludq(mmA, anyptr_gpB);
  e->pmuludq(xmmA, xmmB);
  e->pmuludq(xmmA, anyptr_gpB);
  e->por(xmmA, xmmB);
  e->por(xmmA, anyptr_gpB);
  e->pslld(xmmA, xmmB);
  e->pslld(xmmA, anyptr_gpB);
  e->pslld(xmmA, 0);
  e->psllq(xmmA, xmmB);
  e->psllq(xmmA, anyptr_gpB);
  e->psllq(xmmA, 0);
  e->psllw(xmmA, xmmB);
  e->psllw(xmmA, anyptr_gpB);
  e->psllw(xmmA, 0);
  e->pslldq(xmmA, 0);
  e->psrad(xmmA, xmmB);
  e->psrad(xmmA, anyptr_gpB);
  e->psrad(xmmA, 0);
  e->psraw(xmmA, xmmB);
  e->psraw(xmmA, anyptr_gpB);
  e->psraw(xmmA, 0);
  e->psubb(xmmA, xmmB);
  e->psubb(xmmA, anyptr_gpB);
  e->psubw(xmmA, xmmB);
  e->psubw(xmmA, anyptr_gpB);
  e->psubd(xmmA, xmmB);
  e->psubd(xmmA, anyptr_gpB);
  e->psubq(mmA, mmB);
  e->psubq(mmA, anyptr_gpB);
  e->psubq(xmmA, xmmB);
  e->psubq(xmmA, anyptr_gpB);
  e->pmaddwd(xmmA, xmmB);
  e->pmaddwd(xmmA, anyptr_gpB);
  e->pshufd(xmmA, xmmB, 0);
  e->pshufd(xmmA, anyptr_gpB, 0);
  e->pshufhw(xmmA, xmmB, 0);
  e->pshufhw(xmmA, anyptr_gpB, 0);
  e->pshuflw(xmmA, xmmB, 0);
  e->pshuflw(xmmA, anyptr_gpB, 0);
  e->psrld(xmmA, xmmB);
  e->psrld(xmmA, anyptr_gpB);
  e->psrld(xmmA, 0);
  e->psrlq(xmmA, xmmB);
  e->psrlq(xmmA, anyptr_gpB);
  e->psrlq(xmmA, 0);
  e->psrldq(xmmA, 0);
  e->psrlw(xmmA, xmmB);
  e->psrlw(xmmA, anyptr_gpB);
  e->psrlw(xmmA, 0);
  e->psubsb(xmmA, xmmB);
  e->psubsb(xmmA, anyptr_gpB);
  e->psubsw(xmmA, xmmB);
  e->psubsw(xmmA, anyptr_gpB);
  e->psubusb(xmmA, xmmB);
  e->psubusb(xmmA, anyptr_gpB);
  e->psubusw(xmmA, xmmB);
  e->psubusw(xmmA, anyptr_gpB);
  e->punpckhbw(xmmA, xmmB);
  e->punpckhbw(xmmA, anyptr_gpB);
  e->punpckhwd(xmmA, xmmB);
  e->punpckhwd(xmmA, anyptr_gpB);
  e->punpckhdq(xmmA, xmmB);
  e->punpckhdq(xmmA, anyptr_gpB);
  e->punpckhqdq(xmmA, xmmB);
  e->punpckhqdq(xmmA, anyptr_gpB);
  e->punpcklbw(xmmA, xmmB);
  e->punpcklbw(xmmA, anyptr_gpB);
  e->punpcklwd(xmmA, xmmB);
  e->punpcklwd(xmmA, anyptr_gpB);
  e->punpckldq(xmmA, xmmB);
  e->punpckldq(xmmA, anyptr_gpB);
  e->punpcklqdq(xmmA, xmmB);
  e->punpcklqdq(xmmA, anyptr_gpB);
  e->pxor(xmmA, xmmB);
  e->pxor(xmmA, anyptr_gpB);
  e->sqrtpd(xmmA, xmmB);
  e->sqrtpd(xmmA, anyptr_gpB);
  e->sqrtsd(xmmA, xmmB);
  e->sqrtsd(xmmA, anyptr_gpB);
  e->subpd(xmmA, xmmB);
  e->subpd(xmmA, anyptr_gpB);
  e->subsd(xmmA, xmmB);
  e->subsd(xmmA, anyptr_gpB);
  e->ucomisd(xmmA, xmmB);
  e->ucomisd(xmmA, anyptr_gpB);
  e->unpckhpd(xmmA, xmmB);
  e->unpckhpd(xmmA, anyptr_gpB);
  e->unpcklpd(xmmA, xmmB);
  e->unpcklpd(xmmA, anyptr_gpB);
  e->xorpd(xmmA, xmmB);
  e->xorpd(xmmA, anyptr_gpB);

  // SSE3.
  e->nop();

  e->addsubpd(xmmA, xmmB);
  e->addsubpd(xmmA, anyptr_gpB);
  e->addsubps(xmmA, xmmB);
  e->addsubps(xmmA, anyptr_gpB);
  e->fisttp(dword_ptr(gzA));
  e->haddpd(xmmA, xmmB);
  e->haddpd(xmmA, anyptr_gpB);
  e->haddps(xmmA, xmmB);
  e->haddps(xmmA, anyptr_gpB);
  e->hsubpd(xmmA, xmmB);
  e->hsubpd(xmmA, anyptr_gpB);
  e->hsubps(xmmA, xmmB);
  e->hsubps(xmmA, anyptr_gpB);
  e->lddqu(xmmA, anyptr_gpB);
  e->movddup(xmmA, xmmB);
  e->movddup(xmmA, anyptr_gpB);
  e->movshdup(xmmA, xmmB);
  e->movshdup(xmmA, anyptr_gpB);
  e->movsldup(xmmA, xmmB);
  e->movsldup(xmmA, anyptr_gpB);

  // SSSE3.
  e->nop();

  e->psignb(mmA, mmB);
  e->psignb(mmA, anyptr_gpB);
  e->psignb(xmmA, xmmB);
  e->psignb(xmmA, anyptr_gpB);
  e->psignw(mmA, mmB);
  e->psignw(mmA, anyptr_gpB);
  e->psignw(xmmA, xmmB);
  e->psignw(xmmA, anyptr_gpB);
  e->psignd(mmA, mmB);
  e->psignd(mmA, anyptr_gpB);
  e->psignd(xmmA, xmmB);
  e->psignd(xmmA, anyptr_gpB);
  e->phaddw(mmA, mmB);
  e->phaddw(mmA, anyptr_gpB);
  e->phaddw(xmmA, xmmB);
  e->phaddw(xmmA, anyptr_gpB);
  e->phaddd(mmA, mmB);
  e->phaddd(mmA, anyptr_gpB);
  e->phaddd(xmmA, xmmB);
  e->phaddd(xmmA, anyptr_gpB);
  e->phaddsw(mmA, mmB);
  e->phaddsw(mmA, anyptr_gpB);
  e->phaddsw(xmmA, xmmB);
  e->phaddsw(xmmA, anyptr_gpB);
  e->phsubw(mmA, mmB);
  e->phsubw(mmA, anyptr_gpB);
  e->phsubw(xmmA, xmmB);
  e->phsubw(xmmA, anyptr_gpB);
  e->phsubd(mmA, mmB);
  e->phsubd(mmA, anyptr_gpB);
  e->phsubd(xmmA, xmmB);
  e->phsubd(xmmA, anyptr_gpB);
  e->phsubsw(mmA, mmB);
  e->phsubsw(mmA, anyptr_gpB);
  e->phsubsw(xmmA, xmmB);
  e->phsubsw(xmmA, anyptr_gpB);
  e->pmaddubsw(mmA, mmB);
  e->pmaddubsw(mmA, anyptr_gpB);
  e->pmaddubsw(xmmA, xmmB);
  e->pmaddubsw(xmmA, anyptr_gpB);
  e->pabsb(mmA, mmB);
  e->pabsb(mmA, anyptr_gpB);
  e->pabsb(xmmA, xmmB);
  e->pabsb(xmmA, anyptr_gpB);
  e->pabsw(mmA, mmB);
  e->pabsw(mmA, anyptr_gpB);
  e->pabsw(xmmA, xmmB);
  e->pabsw(xmmA, anyptr_gpB);
  e->pabsd(mmA, mmB);
  e->pabsd(mmA, anyptr_gpB);
  e->pabsd(xmmA, xmmB);
  e->pabsd(xmmA, anyptr_gpB);
  e->pmulhrsw(mmA, mmB);
  e->pmulhrsw(mmA, anyptr_gpB);
  e->pmulhrsw(xmmA, xmmB);
  e->pmulhrsw(xmmA, anyptr_gpB);
  e->pshufb(mmA, mmB);
  e->pshufb(mmA, anyptr_gpB);
  e->pshufb(xmmA, xmmB);
  e->pshufb(xmmA, anyptr_gpB);
  e->palignr(mmA, mmB, 0);
  e->palignr(mmA, anyptr_gpB, 0);
  e->palignr(xmmA, xmmB, 0);
  e->palignr(xmmA, anyptr_gpB, 0);

  // SSE4.1.
  e->nop();

  e->blendpd(xmmA, xmmB, 0);
  e->blendpd(xmmA, anyptr_gpB, 0);
  e->blendps(xmmA, xmmB, 0);
  e->blendps(xmmA, anyptr_gpB, 0);
  e->blendvpd(xmmA, xmmB);                  // Implicit xmmA, xmmB, <XMM0>
  e->blendvpd(xmmA, xmmB, xmm0);            // Explicit xmmA, xmmB, <XMM0>
  e->blendvpd(xmmA, anyptr_gpB);            // Implicit xmmA, mem , <XMM0>
  e->blendvpd(xmmA, anyptr_gpB, xmm0);      // Explicit xmmA, mem , <XMM0>
  e->blendvps(xmmA, xmmB);                  // Implicit xmmA, xmmB, <XMM0>
  e->blendvps(xmmA, xmmB, xmm0);            // Explicit xmmA, xmmB, <XMM0>
  e->blendvps(xmmA, anyptr_gpB);            // Implicit xmmA, mem , <XMM0>
  e->blendvps(xmmA, anyptr_gpB, xmm0);      // Explicit xmmA, mem , <XMM0>

  e->dppd(xmmA, xmmB, 0);
  e->dppd(xmmA, anyptr_gpB, 0);
  e->dpps(xmmA, xmmB, 0);
  e->dpps(xmmA, anyptr_gpB, 0);
  e->extractps(gdA, xmmB, 0);
  e->extractps(gzA, xmmB, 0);
  e->extractps(anyptr_gpA, xmmB, 0);
  e->insertps(xmmA, xmmB, 0);
  e->insertps(xmmA, anyptr_gpB, 0);
  e->movntdqa(xmmA, anyptr_gpB);
  e->mpsadbw(xmmA, xmmB, 0);
  e->mpsadbw(xmmA, anyptr_gpB, 0);
  e->packusdw(xmmA, xmmB);
  e->packusdw(xmmA, anyptr_gpB);
  e->pblendvb(xmmA, xmmB);                  // Implicit xmmA, xmmB, <XMM0>
  e->pblendvb(xmmA, xmmB, xmm0);            // Explicit xmmA, xmmB, <XMM0>
  e->pblendvb(xmmA, anyptr_gpB);            // Implicit xmmA, mem, <XMM0>
  e->pblendvb(xmmA, anyptr_gpB, xmm0);      // Implicit xmmA, mem, <XMM0>
  e->pblendw(xmmA, xmmB, 0);
  e->pblendw(xmmA, anyptr_gpB, 0);
  e->pcmpeqq(xmmA, xmmB);
  e->pcmpeqq(xmmA, anyptr_gpB);
  e->pextrb(gdA, xmmB, 0);
  e->pextrb(gzA, xmmB, 0);
  e->pextrb(anyptr_gpA, xmmB, 0);
  e->pextrd(gdA, xmmB, 0);
  e->pextrd(gzA, xmmB, 0);
  e->pextrd(anyptr_gpA, xmmB, 0);
  if (isX64) e->pextrq(gzA, xmmB, 0);
  if (isX64) e->pextrq(anyptr_gpA, xmmB, 0);
  e->pextrw(gdA, xmmB, 0);
  e->pextrw(gzA, xmmB, 0);
  e->pextrw(anyptr_gpA, xmmB, 0);
  e->phminposuw(xmmA, xmmB);
  e->phminposuw(xmmA, anyptr_gpB);
  e->pinsrb(xmmA, gdB, 0);
  e->pinsrb(xmmA, gzB, 0);
  e->pinsrb(xmmA, anyptr_gpB, 0);
  e->pinsrd(xmmA, gdB, 0);
  e->pinsrd(xmmA, gzB, 0);
  e->pinsrd(xmmA, anyptr_gpB, 0);
  e->pinsrw(xmmA, gdB, 0);
  e->pinsrw(xmmA, gzB, 0);
  e->pinsrw(xmmA, anyptr_gpB, 0);
  e->pmaxuw(xmmA, xmmB);
  e->pmaxuw(xmmA, anyptr_gpB);
  e->pmaxsb(xmmA, xmmB);
  e->pmaxsb(xmmA, anyptr_gpB);
  e->pmaxsd(xmmA, xmmB);
  e->pmaxsd(xmmA, anyptr_gpB);
  e->pmaxud(xmmA, xmmB);
  e->pmaxud(xmmA, anyptr_gpB);
  e->pminsb(xmmA, xmmB);
  e->pminsb(xmmA, anyptr_gpB);
  e->pminuw(xmmA, xmmB);
  e->pminuw(xmmA, anyptr_gpB);
  e->pminud(xmmA, xmmB);
  e->pminud(xmmA, anyptr_gpB);
  e->pminsd(xmmA, xmmB);
  e->pminsd(xmmA, anyptr_gpB);
  e->pmovsxbw(xmmA, xmmB);
  e->pmovsxbw(xmmA, anyptr_gpB);
  e->pmovsxbd(xmmA, xmmB);
  e->pmovsxbd(xmmA, anyptr_gpB);
  e->pmovsxbq(xmmA, xmmB);
  e->pmovsxbq(xmmA, anyptr_gpB);
  e->pmovsxwd(xmmA, xmmB);
  e->pmovsxwd(xmmA, anyptr_gpB);
  e->pmovsxwq(xmmA, xmmB);
  e->pmovsxwq(xmmA, anyptr_gpB);
  e->pmovsxdq(xmmA, xmmB);
  e->pmovsxdq(xmmA, anyptr_gpB);
  e->pmovzxbw(xmmA, xmmB);
  e->pmovzxbw(xmmA, anyptr_gpB);
  e->pmovzxbd(xmmA, xmmB);
  e->pmovzxbd(xmmA, anyptr_gpB);
  e->pmovzxbq(xmmA, xmmB);
  e->pmovzxbq(xmmA, anyptr_gpB);
  e->pmovzxwd(xmmA, xmmB);
  e->pmovzxwd(xmmA, anyptr_gpB);
  e->pmovzxwq(xmmA, xmmB);
  e->pmovzxwq(xmmA, anyptr_gpB);
  e->pmovzxdq(xmmA, xmmB);
  e->pmovzxdq(xmmA, anyptr_gpB);
  e->pmuldq(xmmA, xmmB);
  e->pmuldq(xmmA, anyptr_gpB);
  e->pmulld(xmmA, xmmB);
  e->pmulld(xmmA, anyptr_gpB);
  e->ptest(xmmA, xmmB);
  e->ptest(xmmA, anyptr_gpB);
  e->roundps(xmmA, xmmB, 0);
  e->roundps(xmmA, anyptr_gpB, 0);
  e->roundss(xmmA, xmmB, 0);
  e->roundss(xmmA, anyptr_gpB, 0);
  e->roundpd(xmmA, xmmB, 0);
  e->roundpd(xmmA, anyptr_gpB, 0);
  e->roundsd(xmmA, xmmB, 0);
  e->roundsd(xmmA, anyptr_gpB, 0);

  // SSE4.2.
  e->nop();

  e->pcmpestri(xmmA, xmmB      , imm(0));                 // Implicit xmmA, xmmB, imm, <ECX>, <EAX>, <EDX>
  e->pcmpestri(xmmA, xmmB      , imm(0), ecx, eax, edx);  // Explicit xmmA, xmmB, imm, <ECX>, <EAX>, <EDX>
  e->pcmpestri(xmmA, anyptr_gpB, imm(0));                 // Implicit xmmA, mem , imm, <ECX>, <EAX>, <EDX>
  e->pcmpestri(xmmA, anyptr_gpB, imm(0), ecx, eax, edx);  // Explicit xmmA, mem , imm, <ECX>, <EAX>, <EDX>
  e->pcmpestrm(xmmA, xmmB      , imm(0));                 // Implicit xmmA, xmmB, imm, <XMM0>, <EAX>, <EDX>
  e->pcmpestrm(xmmA, xmmB      , imm(0), xmm0, eax, edx); // Explicit xmmA, xmmB, imm, <XMM0>, <EAX>, <EDX>
  e->pcmpestrm(xmmA, anyptr_gpB, imm(0));                 // Implicit xmmA, mem , imm, <XMM0>, <EAX>, <EDX>
  e->pcmpestrm(xmmA, anyptr_gpB, imm(0), xmm0, eax, edx); // Explicit xmmA, mem , imm, <XMM0>, <EAX>, <EDX>
  e->pcmpistri(xmmA, xmmB      , imm(0));                 // Implicit xmmA, xmmB, imm, <ECX>
  e->pcmpistri(xmmA, xmmB      , imm(0), ecx);            // Explicit xmmA, xmmB, imm, <ECX>
  e->pcmpistri(xmmA, anyptr_gpB, imm(0));                 // Implicit xmmA, mem , imm, <ECX>
  e->pcmpistri(xmmA, anyptr_gpB, imm(0), ecx);            // Explicit xmmA, mem , imm, <ECX>
  e->pcmpistrm(xmmA, xmmB      , imm(0));                 // Implicit xmmA, xmmB, imm, <XMM0>
  e->pcmpistrm(xmmA, xmmB      , imm(0), xmm0);           // Explicit xmmA, xmmB, imm, <XMM0>
  e->pcmpistrm(xmmA, anyptr_gpB, imm(0));                 // Implicit xmmA, mem , imm, <XMM0>
  e->pcmpistrm(xmmA, anyptr_gpB, imm(0), xmm0);           // Explicit xmmA, mem , imm, <XMM0>

  e->pcmpgtq(xmmA, xmmB);
  e->pcmpgtq(xmmA, anyptr_gpB);

  // SSE4A.
  e->nop();

  e->extrq(xmmA, xmmB);
  e->extrq(xmmA, 0x1, 0x2);
  e->extrq(xmmB, 0x1, 0x2);
  e->insertq(xmmA, xmmB);
  e->insertq(xmmA, xmmB, 0x1, 0x2);
  e->movntsd(anyptr_gpA, xmmB);
  e->movntss(anyptr_gpA, xmmB);

  // AESNI.
  e->nop();

  e->aesdec(xmmA, xmmB);
  e->aesdec(xmmA, anyptr_gpB);
  e->aesdeclast(xmmA, xmmB);
  e->aesdeclast(xmmA, anyptr_gpB);
  e->aesenc(xmmA, xmmB);
  e->aesenc(xmmA, anyptr_gpB);
  e->aesenclast(xmmA, xmmB);
  e->aesenclast(xmmA, anyptr_gpB);
  e->aesimc(xmmA, xmmB);
  e->aesimc(xmmA, anyptr_gpB);
  e->aeskeygenassist(xmmA, xmmB, 0);
  e->aeskeygenassist(xmmA, anyptr_gpB, 0);

  // SHA.
  e->nop();

  e->sha1msg1(xmmA, xmmB);
  e->sha1msg1(xmmA, anyptr_gpB);
  e->sha1msg2(xmmA, xmmB);
  e->sha1msg2(xmmA, anyptr_gpB);
  e->sha1nexte(xmmA, xmmB);
  e->sha1nexte(xmmA, anyptr_gpB);
  e->sha1rnds4(xmmA, xmmB, 0);
  e->sha1rnds4(xmmA, anyptr_gpB, 0);
  e->sha256msg1(xmmA, xmmB);
  e->sha256msg1(xmmA, anyptr_gpB);
  e->sha256msg2(xmmA, xmmB);
  e->sha256msg2(xmmA, anyptr_gpB);
  e->sha256rnds2(xmmA, xmmB);               // Implicit xmmA, xmmB, <XMM0>
  e->sha256rnds2(xmmA, xmmB, xmm0);         // Explicit xmmA, xmmB, <XMM0>
  e->sha256rnds2(xmmA, anyptr_gpB);         // Implicit xmmA, mem, <XMM0>
  e->sha256rnds2(xmmA, anyptr_gpB, xmm0);   // Explicit xmmA, mem, <XMM0>

  // PCLMULQDQ.
  e->nop();

  e->pclmulqdq(xmmA, xmmB, 0);
  e->pclmulqdq(xmmA, anyptr_gpB, 0);

  // AVX.
  e->nop();

  e->vaddpd(xmmA, xmmB, xmmC);
  e->vaddpd(xmmA, xmmB, anyptr_gpC);
  e->vaddpd(ymmA, ymmB, ymmC);
  e->vaddpd(ymmA, ymmB, anyptr_gpC);
  e->vaddps(xmmA, xmmB, xmmC);
  e->vaddps(xmmA, xmmB, anyptr_gpC);
  e->vaddps(ymmA, ymmB, ymmC);
  e->vaddps(ymmA, ymmB, anyptr_gpC);
  e->vaddsd(xmmA, xmmB, xmmC);
  e->vaddsd(xmmA, xmmB, anyptr_gpC);
  e->vaddss(xmmA, xmmB, xmmC);
  e->vaddss(xmmA, xmmB, anyptr_gpC);
  e->vaddsubpd(xmmA, xmmB, xmmC);
  e->vaddsubpd(xmmA, xmmB, anyptr_gpC);
  e->vaddsubpd(ymmA, ymmB, ymmC);
  e->vaddsubpd(ymmA, ymmB, anyptr_gpC);
  e->vaddsubps(xmmA, xmmB, xmmC);
  e->vaddsubps(xmmA, xmmB, anyptr_gpC);
  e->vaddsubps(ymmA, ymmB, ymmC);
  e->vaddsubps(ymmA, ymmB, anyptr_gpC);
  e->vandpd(xmmA, xmmB, xmmC);
  e->vandpd(xmmA, xmmB, anyptr_gpC);
  e->vandpd(ymmA, ymmB, ymmC);
  e->vandpd(ymmA, ymmB, anyptr_gpC);
  e->vandps(xmmA, xmmB, xmmC);
  e->vandps(xmmA, xmmB, anyptr_gpC);
  e->vandps(ymmA, ymmB, ymmC);
  e->vandps(ymmA, ymmB, anyptr_gpC);
  e->vandnpd(xmmA, xmmB, xmmC);
  e->vandnpd(xmmA, xmmB, anyptr_gpC);
  e->vandnpd(ymmA, ymmB, ymmC);
  e->vandnpd(ymmA, ymmB, anyptr_gpC);
  e->vandnps(xmmA, xmmB, xmmC);
  e->vandnps(xmmA, xmmB, anyptr_gpC);
  e->vandnps(ymmA, ymmB, ymmC);
  e->vandnps(ymmA, ymmB, anyptr_gpC);
  e->vblendpd(xmmA, xmmB, xmmC, 0);
  e->vblendpd(xmmA, xmmB, anyptr_gpC, 0);
  e->vblendpd(ymmA, ymmB, ymmC, 0);
  e->vblendpd(ymmA, ymmB, anyptr_gpC, 0);
  e->vblendps(xmmA, xmmB, xmmC, 0);
  e->vblendps(xmmA, xmmB, anyptr_gpC, 0);
  e->vblendps(ymmA, ymmB, ymmC, 0);
  e->vblendps(ymmA, ymmB, anyptr_gpC, 0);
  e->vblendvpd(xmmA, xmmB, xmmC, xmmD);
  e->vblendvpd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vblendvpd(ymmA, ymmB, ymmC, ymmD);
  e->vblendvpd(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vbroadcastf128(ymmA, anyptr_gpB);
  e->vbroadcastsd(ymmA, anyptr_gpB);
  e->vbroadcastss(xmmA, anyptr_gpB);
  e->vbroadcastss(ymmA, anyptr_gpB);
  e->vcmppd(xmmA, xmmB, xmmC, 0);
  e->vcmppd(xmmA, xmmB, anyptr_gpC, 0);
  e->vcmppd(ymmA, ymmB, ymmC, 0);
  e->vcmppd(ymmA, ymmB, anyptr_gpC, 0);
  e->vcmpps(xmmA, xmmB, xmmC, 0);
  e->vcmpps(xmmA, xmmB, anyptr_gpC, 0);
  e->vcmpps(ymmA, ymmB, ymmC, 0);
  e->vcmpps(ymmA, ymmB, anyptr_gpC, 0);
  e->vcmpsd(xmmA, xmmB, xmmC, 0);
  e->vcmpsd(xmmA, xmmB, anyptr_gpC, 0);
  e->vcmpss(xmmA, xmmB, xmmC, 0);
  e->vcmpss(xmmA, xmmB, anyptr_gpC, 0);
  e->vcomisd(xmmA, xmmB);
  e->vcomisd(xmmA, anyptr_gpB);
  e->vcomiss(xmmA, xmmB);
  e->vcomiss(xmmA, anyptr_gpB);
  e->vcvtdq2pd(xmmA, xmmB);
  e->vcvtdq2pd(xmmA, anyptr_gpB);
  e->vcvtdq2pd(ymmA, xmmB);
  e->vcvtdq2pd(ymmA, anyptr_gpB);
  e->vcvtdq2ps(xmmA, xmmB);
  e->vcvtdq2ps(xmmA, anyptr_gpB);
  e->vcvtdq2ps(ymmA, ymmB);
  e->vcvtdq2ps(ymmA, anyptr_gpB);
  e->vcvtpd2dq(xmmA, xmmB);
  e->vcvtpd2dq(xmmA, ymmB);
  e->vcvtpd2dq(xmmA, anyptr_gpB);
  e->vcvtpd2ps(xmmA, xmmB);
  e->vcvtpd2ps(xmmA, ymmB);
  e->vcvtpd2ps(xmmA, anyptr_gpB);
  e->vcvtps2dq(xmmA, xmmB);
  e->vcvtps2dq(xmmA, anyptr_gpB);
  e->vcvtps2dq(ymmA, ymmB);
  e->vcvtps2dq(ymmA, anyptr_gpB);
  e->vcvtps2pd(xmmA, xmmB);
  e->vcvtps2pd(xmmA, anyptr_gpB);
  e->vcvtps2pd(ymmA, xmmB);
  e->vcvtps2pd(ymmA, anyptr_gpB);
  e->vcvtsd2si(gzA, xmmB);
  e->vcvtsd2si(gzA, anyptr_gpB);
  e->vcvtsd2ss(xmmA, xmmB, xmmC);
  e->vcvtsd2ss(xmmA, xmmB, anyptr_gpC);
  e->vcvtsi2sd(xmmA, xmmB, gzC);
  e->vcvtsi2sd(xmmA, xmmB, anyptr_gpC);
  e->vcvtsi2ss(xmmA, xmmB, gzC);
  e->vcvtsi2ss(xmmA, xmmB, anyptr_gpC);
  e->vcvtss2sd(xmmA, xmmB, xmmC);
  e->vcvtss2sd(xmmA, xmmB, anyptr_gpC);
  e->vcvtss2si(gzA, xmmB);
  e->vcvtss2si(gzA, anyptr_gpB);
  e->vcvttpd2dq(xmmA, xmmB);
  e->vcvttpd2dq(xmmA, ymmB);
  e->vcvttpd2dq(xmmA, anyptr_gpB);
  e->vcvttps2dq(xmmA, xmmB);
  e->vcvttps2dq(xmmA, anyptr_gpB);
  e->vcvttps2dq(ymmA, ymmB);
  e->vcvttps2dq(ymmA, anyptr_gpB);
  e->vcvttsd2si(gzA, xmmB);
  e->vcvttsd2si(gzA, anyptr_gpB);
  e->vcvttss2si(gzA, xmmB);
  e->vcvttss2si(gzA, anyptr_gpB);
  e->vdivpd(xmmA, xmmB, xmmC);
  e->vdivpd(xmmA, xmmB, anyptr_gpC);
  e->vdivpd(ymmA, ymmB, ymmC);
  e->vdivpd(ymmA, ymmB, anyptr_gpC);
  e->vdivps(xmmA, xmmB, xmmC);
  e->vdivps(xmmA, xmmB, anyptr_gpC);
  e->vdivps(ymmA, ymmB, ymmC);
  e->vdivps(ymmA, ymmB, anyptr_gpC);
  e->vdivsd(xmmA, xmmB, xmmC);
  e->vdivsd(xmmA, xmmB, anyptr_gpC);
  e->vdivss(xmmA, xmmB, xmmC);
  e->vdivss(xmmA, xmmB, anyptr_gpC);
  e->vdppd(xmmA, xmmB, xmmC, 0);
  e->vdppd(xmmA, xmmB, anyptr_gpC, 0);
  e->vdpps(xmmA, xmmB, xmmC, 0);
  e->vdpps(xmmA, xmmB, anyptr_gpC, 0);
  e->vdpps(ymmA, ymmB, ymmC, 0);
  e->vdpps(ymmA, ymmB, anyptr_gpC, 0);
  e->vextractf128(xmmA, ymmB, 0);
  e->vextractf128(anyptr_gpA, ymmB, 0);
  e->vextractps(gzA, xmmB, 0);
  e->vextractps(anyptr_gpA, xmmB, 0);
  e->vhaddpd(xmmA, xmmB, xmmC);
  e->vhaddpd(xmmA, xmmB, anyptr_gpC);
  e->vhaddpd(ymmA, ymmB, ymmC);
  e->vhaddpd(ymmA, ymmB, anyptr_gpC);
  e->vhaddps(xmmA, xmmB, xmmC);
  e->vhaddps(xmmA, xmmB, anyptr_gpC);
  e->vhaddps(ymmA, ymmB, ymmC);
  e->vhaddps(ymmA, ymmB, anyptr_gpC);
  e->vhsubpd(xmmA, xmmB, xmmC);
  e->vhsubpd(xmmA, xmmB, anyptr_gpC);
  e->vhsubpd(ymmA, ymmB, ymmC);
  e->vhsubpd(ymmA, ymmB, anyptr_gpC);
  e->vhsubps(xmmA, xmmB, xmmC);
  e->vhsubps(xmmA, xmmB, anyptr_gpC);
  e->vhsubps(ymmA, ymmB, ymmC);
  e->vhsubps(ymmA, ymmB, anyptr_gpC);
  e->vinsertf128(ymmA, ymmB, xmmC, 0);
  e->vinsertf128(ymmA, ymmB, anyptr_gpC, 0);
  e->vinsertps(xmmA, xmmB, xmmC, 0);
  e->vinsertps(xmmA, xmmB, anyptr_gpC, 0);
  e->vlddqu(xmmA, anyptr_gpB);
  e->vlddqu(ymmA, anyptr_gpB);
  e->vldmxcsr(anyptr_gpA);
  e->vmaskmovdqu(xmmA, xmmB);                // Implicit xmmA, xmmB, <ds:[EDI|RDI]>
  e->vmaskmovdqu(xmmA, xmmB, ptr(e->zdi())); // Explicit xmmA, xmmB, <ds:[EDI|RDI]>
  e->vmaskmovps(xmmA, xmmB, anyptr_gpC);
  e->vmaskmovps(ymmA, ymmB, anyptr_gpC);
  e->vmaskmovps(anyptr_gpA, xmmB, xmmC);
  e->vmaskmovps(anyptr_gpA, ymmB, ymmC);
  e->vmaskmovpd(xmmA, xmmB, anyptr_gpC);
  e->vmaskmovpd(ymmA, ymmB, anyptr_gpC);
  e->vmaskmovpd(anyptr_gpA, xmmB, xmmC);
  e->vmaskmovpd(anyptr_gpA, ymmB, ymmC);
  e->vmaxpd(xmmA, xmmB, xmmC);
  e->vmaxpd(xmmA, xmmB, anyptr_gpC);
  e->vmaxpd(ymmA, ymmB, ymmC);
  e->vmaxpd(ymmA, ymmB, anyptr_gpC);
  e->vmaxps(xmmA, xmmB, xmmC);
  e->vmaxps(xmmA, xmmB, anyptr_gpC);
  e->vmaxps(ymmA, ymmB, ymmC);
  e->vmaxps(ymmA, ymmB, anyptr_gpC);
  e->vmaxsd(xmmA, xmmB, xmmC);
  e->vmaxsd(xmmA, xmmB, anyptr_gpC);
  e->vmaxss(xmmA, xmmB, xmmC);
  e->vmaxss(xmmA, xmmB, anyptr_gpC);
  e->vminpd(xmmA, xmmB, xmmC);
  e->vminpd(xmmA, xmmB, anyptr_gpC);
  e->vminpd(ymmA, ymmB, ymmC);
  e->vminpd(ymmA, ymmB, anyptr_gpC);
  e->vminps(xmmA, xmmB, xmmC);
  e->vminps(xmmA, xmmB, anyptr_gpC);
  e->vminps(ymmA, ymmB, ymmC);
  e->vminps(ymmA, ymmB, anyptr_gpC);
  e->vminsd(xmmA, xmmB, xmmC);
  e->vminsd(xmmA, xmmB, anyptr_gpC);
  e->vminss(xmmA, xmmB, xmmC);
  e->vminss(xmmA, xmmB, anyptr_gpC);
  e->vmovapd(xmmA, xmmB);
  e->vmovapd(xmmA, anyptr_gpB);
  e->vmovapd(anyptr_gpA, xmmB);
  e->vmovapd(ymmA, ymmB);
  e->vmovapd(ymmA, anyptr_gpB);
  e->vmovapd(anyptr_gpA, ymmB);
  e->vmovaps(xmmA, xmmB);
  e->vmovaps(xmmA, anyptr_gpB);
  e->vmovaps(anyptr_gpA, xmmB);
  e->vmovaps(ymmA, ymmB);
  e->vmovaps(ymmA, anyptr_gpB);
  e->vmovaps(anyptr_gpA, ymmB);
  e->vmovd(xmmA, gzB);
  e->vmovd(xmmA, anyptr_gpB);
  e->vmovd(gzA, xmmB);
  e->vmovd(anyptr_gpA, xmmB);
  e->vmovddup(xmmA, xmmB);
  e->vmovddup(xmmA, anyptr_gpB);
  e->vmovddup(ymmA, ymmB);
  e->vmovddup(ymmA, anyptr_gpB);
  e->vmovdqa(xmmA, xmmB);
  e->vmovdqa(xmmA, anyptr_gpB);
  e->vmovdqa(anyptr_gpA, xmmB);
  e->vmovdqa(ymmA, ymmB);
  e->vmovdqa(ymmA, anyptr_gpB);
  e->vmovdqa(anyptr_gpA, ymmB);
  e->vmovdqu(xmmA, xmmB);
  e->vmovdqu(xmmA, anyptr_gpB);
  e->vmovdqu(anyptr_gpA, xmmB);
  e->vmovdqu(ymmA, ymmB);
  e->vmovdqu(ymmA, anyptr_gpB);
  e->vmovdqu(anyptr_gpA, ymmB);
  e->vmovhlps(xmmA, xmmB, xmmC);
  e->vmovhpd(xmmA, xmmB, anyptr_gpC);
  e->vmovhpd(anyptr_gpA, xmmB);
  e->vmovhps(xmmA, xmmB, anyptr_gpC);
  e->vmovhps(anyptr_gpA, xmmB);
  e->vmovlhps(xmmA, xmmB, xmmC);
  e->vmovlpd(xmmA, xmmB, anyptr_gpC);
  e->vmovlpd(anyptr_gpA, xmmB);
  e->vmovlps(xmmA, xmmB, anyptr_gpC);
  e->vmovlps(anyptr_gpA, xmmB);
  e->vmovmskpd(gzA, xmmB);
  e->vmovmskpd(gzA, ymmB);
  e->vmovmskps(gzA, xmmB);
  e->vmovmskps(gzA, ymmB);
  e->vmovntdq(anyptr_gpA, xmmB);
  e->vmovntdq(anyptr_gpA, ymmB);
  e->vmovntdqa(xmmA, anyptr_gpB);
  e->vmovntpd(anyptr_gpA, xmmB);
  e->vmovntpd(anyptr_gpA, ymmB);
  e->vmovntps(anyptr_gpA, xmmB);
  e->vmovntps(anyptr_gpA, ymmB);
  e->vmovsd(xmmA, xmmB, xmmC);
  e->vmovsd(xmmA, anyptr_gpB);
  e->vmovsd(anyptr_gpA, xmmB);
  e->vmovshdup(xmmA, xmmB);
  e->vmovshdup(xmmA, anyptr_gpB);
  e->vmovshdup(ymmA, ymmB);
  e->vmovshdup(ymmA, anyptr_gpB);
  e->vmovsldup(xmmA, xmmB);
  e->vmovsldup(xmmA, anyptr_gpB);
  e->vmovsldup(ymmA, ymmB);
  e->vmovsldup(ymmA, anyptr_gpB);
  e->vmovss(xmmA, xmmB, xmmC);
  e->vmovss(xmmA, anyptr_gpB);
  e->vmovss(anyptr_gpA, xmmB);
  e->vmovupd(xmmA, xmmB);
  e->vmovupd(xmmA, anyptr_gpB);
  e->vmovupd(anyptr_gpA, xmmB);
  e->vmovupd(ymmA, ymmB);
  e->vmovupd(ymmA, anyptr_gpB);
  e->vmovupd(anyptr_gpA, ymmB);
  e->vmovups(xmmA, xmmB);
  e->vmovups(xmmA, anyptr_gpB);
  e->vmovups(anyptr_gpA, xmmB);
  e->vmovups(ymmA, ymmB);
  e->vmovups(ymmA, anyptr_gpB);
  e->vmovups(anyptr_gpA, ymmB);
  e->vmpsadbw(xmmA, xmmB, xmmC, 0);
  e->vmpsadbw(xmmA, xmmB, anyptr_gpC, 0);
  e->vmulpd(xmmA, xmmB, xmmC);
  e->vmulpd(xmmA, xmmB, anyptr_gpC);
  e->vmulpd(ymmA, ymmB, ymmC);
  e->vmulpd(ymmA, ymmB, anyptr_gpC);
  e->vmulps(xmmA, xmmB, xmmC);
  e->vmulps(xmmA, xmmB, anyptr_gpC);
  e->vmulps(ymmA, ymmB, ymmC);
  e->vmulps(ymmA, ymmB, anyptr_gpC);
  e->vmulsd(xmmA, xmmB, xmmC);
  e->vmulsd(xmmA, xmmB, anyptr_gpC);
  e->vmulss(xmmA, xmmB, xmmC);
  e->vmulss(xmmA, xmmB, anyptr_gpC);
  e->vorpd(xmmA, xmmB, xmmC);
  e->vorpd(xmmA, xmmB, anyptr_gpC);
  e->vorpd(ymmA, ymmB, ymmC);
  e->vorpd(ymmA, ymmB, anyptr_gpC);
  e->vorps(xmmA, xmmB, xmmC);
  e->vorps(xmmA, xmmB, anyptr_gpC);
  e->vorps(ymmA, ymmB, ymmC);
  e->vorps(ymmA, ymmB, anyptr_gpC);
  e->vpabsb(xmmA, xmmB);
  e->vpabsb(xmmA, anyptr_gpB);
  e->vpabsd(xmmA, xmmB);
  e->vpabsd(xmmA, anyptr_gpB);
  e->vpabsw(xmmA, xmmB);
  e->vpabsw(xmmA, anyptr_gpB);
  e->vpackssdw(xmmA, xmmB, xmmC);
  e->vpackssdw(xmmA, xmmB, anyptr_gpC);
  e->vpacksswb(xmmA, xmmB, xmmC);
  e->vpacksswb(xmmA, xmmB, anyptr_gpC);
  e->vpackusdw(xmmA, xmmB, xmmC);
  e->vpackusdw(xmmA, xmmB, anyptr_gpC);
  e->vpackuswb(xmmA, xmmB, xmmC);
  e->vpackuswb(xmmA, xmmB, anyptr_gpC);
  e->vpaddb(xmmA, xmmB, xmmC);
  e->vpaddb(xmmA, xmmB, anyptr_gpC);
  e->vpaddd(xmmA, xmmB, xmmC);
  e->vpaddd(xmmA, xmmB, anyptr_gpC);
  e->vpaddq(xmmA, xmmB, xmmC);
  e->vpaddq(xmmA, xmmB, anyptr_gpC);
  e->vpaddw(xmmA, xmmB, xmmC);
  e->vpaddw(xmmA, xmmB, anyptr_gpC);
  e->vpaddsb(xmmA, xmmB, xmmC);
  e->vpaddsb(xmmA, xmmB, anyptr_gpC);
  e->vpaddsw(xmmA, xmmB, xmmC);
  e->vpaddsw(xmmA, xmmB, anyptr_gpC);
  e->vpaddusb(xmmA, xmmB, xmmC);
  e->vpaddusb(xmmA, xmmB, anyptr_gpC);
  e->vpaddusw(xmmA, xmmB, xmmC);
  e->vpaddusw(xmmA, xmmB, anyptr_gpC);
  e->vpalignr(xmmA, xmmB, xmmC, 0);
  e->vpalignr(xmmA, xmmB, anyptr_gpC, 0);
  e->vpand(xmmA, xmmB, xmmC);
  e->vpand(xmmA, xmmB, anyptr_gpC);
  e->vpandn(xmmA, xmmB, xmmC);
  e->vpandn(xmmA, xmmB, anyptr_gpC);
  e->vpavgb(xmmA, xmmB, xmmC);
  e->vpavgb(xmmA, xmmB, anyptr_gpC);
  e->vpavgw(xmmA, xmmB, xmmC);
  e->vpavgw(xmmA, xmmB, anyptr_gpC);
  e->vpblendvb(xmmA, xmmB, xmmC, xmmD);
  e->vpblendvb(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpblendw(xmmA, xmmB, xmmC, 0);
  e->vpblendw(xmmA, xmmB, anyptr_gpC, 0);
  e->vpcmpeqb(xmmA, xmmB, xmmC);
  e->vpcmpeqb(xmmA, xmmB, anyptr_gpC);
  e->vpcmpeqd(xmmA, xmmB, xmmC);
  e->vpcmpeqd(xmmA, xmmB, anyptr_gpC);
  e->vpcmpeqq(xmmA, xmmB, xmmC);
  e->vpcmpeqq(xmmA, xmmB, anyptr_gpC);
  e->vpcmpeqw(xmmA, xmmB, xmmC);
  e->vpcmpeqw(xmmA, xmmB, anyptr_gpC);
  e->vpcmpgtb(xmmA, xmmB, xmmC);
  e->vpcmpgtb(xmmA, xmmB, anyptr_gpC);
  e->vpcmpgtd(xmmA, xmmB, xmmC);
  e->vpcmpgtd(xmmA, xmmB, anyptr_gpC);
  e->vpcmpgtq(xmmA, xmmB, xmmC);
  e->vpcmpgtq(xmmA, xmmB, anyptr_gpC);
  e->vpcmpgtw(xmmA, xmmB, xmmC);
  e->vpcmpgtw(xmmA, xmmB, anyptr_gpC);
  e->vpcmpestri(xmmA, xmmB, 0);
  e->vpcmpestri(xmmA, anyptr_gpB, 0);
  e->vpcmpestrm(xmmA, xmmB, 0);
  e->vpcmpestrm(xmmA, anyptr_gpB, 0);
  e->vpcmpistri(xmmA, xmmB, 0);
  e->vpcmpistri(xmmA, anyptr_gpB, 0);
  e->vpcmpistrm(xmmA, xmmB, 0);
  e->vpcmpistrm(xmmA, anyptr_gpB, 0);
  e->vpermilpd(xmmA, xmmB, xmmC);
  e->vpermilpd(xmmA, xmmB, anyptr_gpC);
  e->vpermilpd(ymmA, ymmB, ymmC);
  e->vpermilpd(ymmA, ymmB, anyptr_gpC);
  e->vpermilpd(xmmA, xmmB, 0);
  e->vpermilpd(xmmA, anyptr_gpB, 0);
  e->vpermilpd(ymmA, ymmB, 0);
  e->vpermilpd(ymmA, anyptr_gpB, 0);
  e->vpermilps(xmmA, xmmB, xmmC);
  e->vpermilps(xmmA, xmmB, anyptr_gpC);
  e->vpermilps(ymmA, ymmB, ymmC);
  e->vpermilps(ymmA, ymmB, anyptr_gpC);
  e->vpermilps(xmmA, xmmB, 0);
  e->vpermilps(xmmA, anyptr_gpB, 0);
  e->vpermilps(ymmA, ymmB, 0);
  e->vpermilps(ymmA, anyptr_gpB, 0);
  e->vperm2f128(ymmA, ymmB, ymmC, 0);
  e->vperm2f128(ymmA, ymmB, anyptr_gpC, 0);
  e->vpextrb(gzA, xmmB, 0);
  e->vpextrb(anyptr_gpA, xmmB, 0);
  e->vpextrd(gzA, xmmB, 0);
  e->vpextrd(anyptr_gpA, xmmB, 0);
  if (isX64) e->vpextrq(gzA, xmmB, 0);
  if (isX64) e->vpextrq(anyptr_gpA, xmmB, 0);
  e->vpextrw(gzA, xmmB, 0);
  e->vpextrw(anyptr_gpA, xmmB, 0);
  e->vphaddd(xmmA, xmmB, xmmC);
  e->vphaddd(xmmA, xmmB, anyptr_gpC);
  e->vphaddsw(xmmA, xmmB, xmmC);
  e->vphaddsw(xmmA, xmmB, anyptr_gpC);
  e->vphaddw(xmmA, xmmB, xmmC);
  e->vphaddw(xmmA, xmmB, anyptr_gpC);
  e->vphminposuw(xmmA, xmmB);
  e->vphminposuw(xmmA, anyptr_gpB);
  e->vphsubd(xmmA, xmmB, xmmC);
  e->vphsubd(xmmA, xmmB, anyptr_gpC);
  e->vphsubsw(xmmA, xmmB, xmmC);
  e->vphsubsw(xmmA, xmmB, anyptr_gpC);
  e->vphsubw(xmmA, xmmB, xmmC);
  e->vphsubw(xmmA, xmmB, anyptr_gpC);
  e->vpinsrb(xmmA, xmmB, gzC, 0);
  e->vpinsrb(xmmA, xmmB, anyptr_gpC, 0);
  e->vpinsrd(xmmA, xmmB, gzC, 0);
  e->vpinsrd(xmmA, xmmB, anyptr_gpC, 0);
  e->vpinsrw(xmmA, xmmB, gzC, 0);
  e->vpinsrw(xmmA, xmmB, anyptr_gpC, 0);
  e->vpmaddubsw(xmmA, xmmB, xmmC);
  e->vpmaddubsw(xmmA, xmmB, anyptr_gpC);
  e->vpmaddwd(xmmA, xmmB, xmmC);
  e->vpmaddwd(xmmA, xmmB, anyptr_gpC);
  e->vpmaxsb(xmmA, xmmB, xmmC);
  e->vpmaxsb(xmmA, xmmB, anyptr_gpC);
  e->vpmaxsd(xmmA, xmmB, xmmC);
  e->vpmaxsd(xmmA, xmmB, anyptr_gpC);
  e->vpmaxsw(xmmA, xmmB, xmmC);
  e->vpmaxsw(xmmA, xmmB, anyptr_gpC);
  e->vpmaxub(xmmA, xmmB, xmmC);
  e->vpmaxub(xmmA, xmmB, anyptr_gpC);
  e->vpmaxud(xmmA, xmmB, xmmC);
  e->vpmaxud(xmmA, xmmB, anyptr_gpC);
  e->vpmaxuw(xmmA, xmmB, xmmC);
  e->vpmaxuw(xmmA, xmmB, anyptr_gpC);
  e->vpminsb(xmmA, xmmB, xmmC);
  e->vpminsb(xmmA, xmmB, anyptr_gpC);
  e->vpminsd(xmmA, xmmB, xmmC);
  e->vpminsd(xmmA, xmmB, anyptr_gpC);
  e->vpminsw(xmmA, xmmB, xmmC);
  e->vpminsw(xmmA, xmmB, anyptr_gpC);
  e->vpminub(xmmA, xmmB, xmmC);
  e->vpminub(xmmA, xmmB, anyptr_gpC);
  e->vpminud(xmmA, xmmB, xmmC);
  e->vpminud(xmmA, xmmB, anyptr_gpC);
  e->vpminuw(xmmA, xmmB, xmmC);
  e->vpminuw(xmmA, xmmB, anyptr_gpC);
  e->vpmovmskb(gzA, xmmB);
  e->vpmovsxbd(xmmA, xmmB);
  e->vpmovsxbd(xmmA, anyptr_gpB);
  e->vpmovsxbq(xmmA, xmmB);
  e->vpmovsxbq(xmmA, anyptr_gpB);
  e->vpmovsxbw(xmmA, xmmB);
  e->vpmovsxbw(xmmA, anyptr_gpB);
  e->vpmovsxdq(xmmA, xmmB);
  e->vpmovsxdq(xmmA, anyptr_gpB);
  e->vpmovsxwd(xmmA, xmmB);
  e->vpmovsxwd(xmmA, anyptr_gpB);
  e->vpmovsxwq(xmmA, xmmB);
  e->vpmovsxwq(xmmA, anyptr_gpB);
  e->vpmovzxbd(xmmA, xmmB);
  e->vpmovzxbd(xmmA, anyptr_gpB);
  e->vpmovzxbq(xmmA, xmmB);
  e->vpmovzxbq(xmmA, anyptr_gpB);
  e->vpmovzxbw(xmmA, xmmB);
  e->vpmovzxbw(xmmA, anyptr_gpB);
  e->vpmovzxdq(xmmA, xmmB);
  e->vpmovzxdq(xmmA, anyptr_gpB);
  e->vpmovzxwd(xmmA, xmmB);
  e->vpmovzxwd(xmmA, anyptr_gpB);
  e->vpmovzxwq(xmmA, xmmB);
  e->vpmovzxwq(xmmA, anyptr_gpB);
  e->vpmuldq(xmmA, xmmB, xmmC);
  e->vpmuldq(xmmA, xmmB, anyptr_gpC);
  e->vpmulhrsw(xmmA, xmmB, xmmC);
  e->vpmulhrsw(xmmA, xmmB, anyptr_gpC);
  e->vpmulhuw(xmmA, xmmB, xmmC);
  e->vpmulhuw(xmmA, xmmB, anyptr_gpC);
  e->vpmulhw(xmmA, xmmB, xmmC);
  e->vpmulhw(xmmA, xmmB, anyptr_gpC);
  e->vpmulld(xmmA, xmmB, xmmC);
  e->vpmulld(xmmA, xmmB, anyptr_gpC);
  e->vpmullw(xmmA, xmmB, xmmC);
  e->vpmullw(xmmA, xmmB, anyptr_gpC);
  e->vpmuludq(xmmA, xmmB, xmmC);
  e->vpmuludq(xmmA, xmmB, anyptr_gpC);
  e->vpor(xmmA, xmmB, xmmC);
  e->vpor(xmmA, xmmB, anyptr_gpC);
  e->vpsadbw(xmmA, xmmB, xmmC);
  e->vpsadbw(xmmA, xmmB, anyptr_gpC);
  e->vpshufb(xmmA, xmmB, xmmC);
  e->vpshufb(xmmA, xmmB, anyptr_gpC);
  e->vpshufd(xmmA, xmmB, 0);
  e->vpshufd(xmmA, anyptr_gpB, 0);
  e->vpshufhw(xmmA, xmmB, 0);
  e->vpshufhw(xmmA, anyptr_gpB, 0);
  e->vpshuflw(xmmA, xmmB, 0);
  e->vpshuflw(xmmA, anyptr_gpB, 0);
  e->vpsignb(xmmA, xmmB, xmmC);
  e->vpsignb(xmmA, xmmB, anyptr_gpC);
  e->vpsignd(xmmA, xmmB, xmmC);
  e->vpsignd(xmmA, xmmB, anyptr_gpC);
  e->vpsignw(xmmA, xmmB, xmmC);
  e->vpsignw(xmmA, xmmB, anyptr_gpC);
  e->vpslld(xmmA, xmmB, xmmC);
  e->vpslld(xmmA, xmmB, anyptr_gpC);
  e->vpslld(xmmA, xmmB, 0);
  e->vpslldq(xmmA, xmmB, 0);
  e->vpsllq(xmmA, xmmB, xmmC);
  e->vpsllq(xmmA, xmmB, anyptr_gpC);
  e->vpsllq(xmmA, xmmB, 0);
  e->vpsllw(xmmA, xmmB, xmmC);
  e->vpsllw(xmmA, xmmB, anyptr_gpC);
  e->vpsllw(xmmA, xmmB, 0);
  e->vpsrad(xmmA, xmmB, xmmC);
  e->vpsrad(xmmA, xmmB, anyptr_gpC);
  e->vpsrad(xmmA, xmmB, 0);
  e->vpsraw(xmmA, xmmB, xmmC);
  e->vpsraw(xmmA, xmmB, anyptr_gpC);
  e->vpsraw(xmmA, xmmB, 0);
  e->vpsrld(xmmA, xmmB, xmmC);
  e->vpsrld(xmmA, xmmB, anyptr_gpC);
  e->vpsrld(xmmA, xmmB, 0);
  e->vpsrldq(xmmA, xmmB, 0);
  e->vpsrlq(xmmA, xmmB, xmmC);
  e->vpsrlq(xmmA, xmmB, anyptr_gpC);
  e->vpsrlq(xmmA, xmmB, 0);
  e->vpsrlw(xmmA, xmmB, xmmC);
  e->vpsrlw(xmmA, xmmB, anyptr_gpC);
  e->vpsrlw(xmmA, xmmB, 0);
  e->vpsubb(xmmA, xmmB, xmmC);
  e->vpsubb(xmmA, xmmB, anyptr_gpC);
  e->vpsubd(xmmA, xmmB, xmmC);
  e->vpsubd(xmmA, xmmB, anyptr_gpC);
  e->vpsubq(xmmA, xmmB, xmmC);
  e->vpsubq(xmmA, xmmB, anyptr_gpC);
  e->vpsubw(xmmA, xmmB, xmmC);
  e->vpsubw(xmmA, xmmB, anyptr_gpC);
  e->vpsubsb(xmmA, xmmB, xmmC);
  e->vpsubsb(xmmA, xmmB, anyptr_gpC);
  e->vpsubsw(xmmA, xmmB, xmmC);
  e->vpsubsw(xmmA, xmmB, anyptr_gpC);
  e->vpsubusb(xmmA, xmmB, xmmC);
  e->vpsubusb(xmmA, xmmB, anyptr_gpC);
  e->vpsubusw(xmmA, xmmB, xmmC);
  e->vpsubusw(xmmA, xmmB, anyptr_gpC);
  e->vptest(xmmA, xmmB);
  e->vptest(xmmA, anyptr_gpB);
  e->vptest(ymmA, ymmB);
  e->vptest(ymmA, anyptr_gpB);
  e->vpunpckhbw(xmmA, xmmB, xmmC);
  e->vpunpckhbw(xmmA, xmmB, anyptr_gpC);
  e->vpunpckhdq(xmmA, xmmB, xmmC);
  e->vpunpckhdq(xmmA, xmmB, anyptr_gpC);
  e->vpunpckhqdq(xmmA, xmmB, xmmC);
  e->vpunpckhqdq(xmmA, xmmB, anyptr_gpC);
  e->vpunpckhwd(xmmA, xmmB, xmmC);
  e->vpunpckhwd(xmmA, xmmB, anyptr_gpC);
  e->vpunpcklbw(xmmA, xmmB, xmmC);
  e->vpunpcklbw(xmmA, xmmB, anyptr_gpC);
  e->vpunpckldq(xmmA, xmmB, xmmC);
  e->vpunpckldq(xmmA, xmmB, anyptr_gpC);
  e->vpunpcklqdq(xmmA, xmmB, xmmC);
  e->vpunpcklqdq(xmmA, xmmB, anyptr_gpC);
  e->vpunpcklwd(xmmA, xmmB, xmmC);
  e->vpunpcklwd(xmmA, xmmB, anyptr_gpC);
  e->vpxor(xmmA, xmmB, xmmC);
  e->vpxor(xmmA, xmmB, anyptr_gpC);
  e->vrcpps(xmmA, xmmB);
  e->vrcpps(xmmA, anyptr_gpB);
  e->vrcpps(ymmA, ymmB);
  e->vrcpps(ymmA, anyptr_gpB);
  e->vrcpss(xmmA, xmmB, xmmC);
  e->vrcpss(xmmA, xmmB, anyptr_gpC);
  e->vrsqrtps(xmmA, xmmB);
  e->vrsqrtps(xmmA, anyptr_gpB);
  e->vrsqrtps(ymmA, ymmB);
  e->vrsqrtps(ymmA, anyptr_gpB);
  e->vrsqrtss(xmmA, xmmB, xmmC);
  e->vrsqrtss(xmmA, xmmB, anyptr_gpC);
  e->vroundpd(xmmA, xmmB, 0);
  e->vroundpd(xmmA, anyptr_gpB, 0);
  e->vroundpd(ymmA, ymmB, 0);
  e->vroundpd(ymmA, anyptr_gpB, 0);
  e->vroundps(xmmA, xmmB, 0);
  e->vroundps(xmmA, anyptr_gpB, 0);
  e->vroundps(ymmA, ymmB, 0);
  e->vroundps(ymmA, anyptr_gpB, 0);
  e->vroundsd(xmmA, xmmB, xmmC, 0);
  e->vroundsd(xmmA, xmmB, anyptr_gpC, 0);
  e->vroundss(xmmA, xmmB, xmmC, 0);
  e->vroundss(xmmA, xmmB, anyptr_gpC, 0);
  e->vshufpd(xmmA, xmmB, xmmC, 0);
  e->vshufpd(xmmA, xmmB, anyptr_gpC, 0);
  e->vshufpd(ymmA, ymmB, ymmC, 0);
  e->vshufpd(ymmA, ymmB, anyptr_gpC, 0);
  e->vshufps(xmmA, xmmB, xmmC, 0);
  e->vshufps(xmmA, xmmB, anyptr_gpC, 0);
  e->vshufps(ymmA, ymmB, ymmC, 0);
  e->vshufps(ymmA, ymmB, anyptr_gpC, 0);
  e->vsqrtpd(xmmA, xmmB);
  e->vsqrtpd(xmmA, anyptr_gpB);
  e->vsqrtpd(ymmA, ymmB);
  e->vsqrtpd(ymmA, anyptr_gpB);
  e->vsqrtps(xmmA, xmmB);
  e->vsqrtps(xmmA, anyptr_gpB);
  e->vsqrtps(ymmA, ymmB);
  e->vsqrtps(ymmA, anyptr_gpB);
  e->vsqrtsd(xmmA, xmmB, xmmC);
  e->vsqrtsd(xmmA, xmmB, anyptr_gpC);
  e->vsqrtss(xmmA, xmmB, xmmC);
  e->vsqrtss(xmmA, xmmB, anyptr_gpC);
  e->vstmxcsr(anyptr_gpA);
  e->vsubpd(xmmA, xmmB, xmmC);
  e->vsubpd(xmmA, xmmB, anyptr_gpC);
  e->vsubpd(ymmA, ymmB, ymmC);
  e->vsubpd(ymmA, ymmB, anyptr_gpC);
  e->vsubps(xmmA, xmmB, xmmC);
  e->vsubps(xmmA, xmmB, anyptr_gpC);
  e->vsubps(ymmA, ymmB, ymmC);
  e->vsubps(ymmA, ymmB, anyptr_gpC);
  e->vsubsd(xmmA, xmmB, xmmC);
  e->vsubsd(xmmA, xmmB, anyptr_gpC);
  e->vsubss(xmmA, xmmB, xmmC);
  e->vsubss(xmmA, xmmB, anyptr_gpC);
  e->vtestps(xmmA, xmmB);
  e->vtestps(xmmA, anyptr_gpB);
  e->vtestps(ymmA, ymmB);
  e->vtestps(ymmA, anyptr_gpB);
  e->vtestpd(xmmA, xmmB);
  e->vtestpd(xmmA, anyptr_gpB);
  e->vtestpd(ymmA, ymmB);
  e->vtestpd(ymmA, anyptr_gpB);
  e->vucomisd(xmmA, xmmB);
  e->vucomisd(xmmA, anyptr_gpB);
  e->vucomiss(xmmA, xmmB);
  e->vucomiss(xmmA, anyptr_gpB);
  e->vunpckhpd(xmmA, xmmB, xmmC);
  e->vunpckhpd(xmmA, xmmB, anyptr_gpC);
  e->vunpckhpd(ymmA, ymmB, ymmC);
  e->vunpckhpd(ymmA, ymmB, anyptr_gpC);
  e->vunpckhps(xmmA, xmmB, xmmC);
  e->vunpckhps(xmmA, xmmB, anyptr_gpC);
  e->vunpckhps(ymmA, ymmB, ymmC);
  e->vunpckhps(ymmA, ymmB, anyptr_gpC);
  e->vunpcklpd(xmmA, xmmB, xmmC);
  e->vunpcklpd(xmmA, xmmB, anyptr_gpC);
  e->vunpcklpd(ymmA, ymmB, ymmC);
  e->vunpcklpd(ymmA, ymmB, anyptr_gpC);
  e->vunpcklps(xmmA, xmmB, xmmC);
  e->vunpcklps(xmmA, xmmB, anyptr_gpC);
  e->vunpcklps(ymmA, ymmB, ymmC);
  e->vunpcklps(ymmA, ymmB, anyptr_gpC);
  e->vxorpd(xmmA, xmmB, xmmC);
  e->vxorpd(xmmA, xmmB, anyptr_gpC);
  e->vxorpd(ymmA, ymmB, ymmC);
  e->vxorpd(ymmA, ymmB, anyptr_gpC);
  e->vxorps(xmmA, xmmB, xmmC);
  e->vxorps(xmmA, xmmB, anyptr_gpC);
  e->vxorps(ymmA, ymmB, ymmC);
  e->vxorps(ymmA, ymmB, anyptr_gpC);
  e->vzeroall();
  e->vex3().vzeroall();
  e->vzeroupper();
  e->vex3().vzeroupper();

  // AVX+AESNI.
  e->nop();

  e->vaesdec(xmmA, xmmB, xmmC);
  e->vaesdec(xmmA, xmmB, anyptr_gpC);
  e->vaesdeclast(xmmA, xmmB, xmmC);
  e->vaesdeclast(xmmA, xmmB, anyptr_gpC);
  e->vaesenc(xmmA, xmmB, xmmC);
  e->vaesenc(xmmA, xmmB, anyptr_gpC);
  e->vaesenclast(xmmA, xmmB, xmmC);
  e->vaesenclast(xmmA, xmmB, anyptr_gpC);
  e->vaesimc(xmmA, xmmB);
  e->vaesimc(xmmA, anyptr_gpB);
  e->vaeskeygenassist(xmmA, xmmB, 0);
  e->vaeskeygenassist(xmmA, anyptr_gpB, 0);

  // AVX+PCLMULQDQ.
  e->nop();

  e->vpclmulqdq(xmmA, xmmB, xmmC, 0);
  e->vpclmulqdq(xmmA, xmmB, anyptr_gpC, 0);

  // AVX2.
  e->nop();

  e->vbroadcasti128(ymmA, anyptr_gpB);
  e->vbroadcastsd(ymmA, xmmB);
  e->vbroadcastss(xmmA, xmmB);
  e->vbroadcastss(ymmA, xmmB);
  e->vextracti128(xmmA, ymmB, 0);
  e->vextracti128(anyptr_gpA, ymmB, 0);
  e->vgatherdpd(xmmA, vx_ptr, xmmC);
  e->vgatherdpd(ymmA, vx_ptr, ymmC);
  e->vgatherdps(xmmA, vx_ptr, xmmC);
  e->vgatherdps(ymmA, vy_ptr, ymmC);
  e->vgatherqpd(xmmA, vx_ptr, xmmC);
  e->vgatherqpd(ymmA, vy_ptr, ymmC);
  e->vgatherqps(xmmA, vx_ptr, xmmC);
  e->vgatherqps(xmmA, vy_ptr, xmmC);
  e->vinserti128(ymmA, ymmB, xmmC, 0);
  e->vinserti128(ymmA, ymmB, anyptr_gpC, 0);
  e->vmovntdqa(ymmA, anyptr_gpB);
  e->vmpsadbw(ymmA, ymmB, ymmC, 0);
  e->vmpsadbw(ymmA, ymmB, anyptr_gpC, 0);
  e->vpabsb(ymmA, ymmB);
  e->vpabsb(ymmA, anyptr_gpB);
  e->vpabsd(ymmA, ymmB);
  e->vpabsd(ymmA, anyptr_gpB);
  e->vpabsw(ymmA, ymmB);
  e->vpabsw(ymmA, anyptr_gpB);
  e->vpackssdw(ymmA, ymmB, ymmC);
  e->vpackssdw(ymmA, ymmB, anyptr_gpC);
  e->vpacksswb(ymmA, ymmB, ymmC);
  e->vpacksswb(ymmA, ymmB, anyptr_gpC);
  e->vpackusdw(ymmA, ymmB, ymmC);
  e->vpackusdw(ymmA, ymmB, anyptr_gpC);
  e->vpackuswb(ymmA, ymmB, ymmC);
  e->vpackuswb(ymmA, ymmB, anyptr_gpC);
  e->vpaddb(ymmA, ymmB, ymmC);
  e->vpaddb(ymmA, ymmB, anyptr_gpC);
  e->vpaddd(ymmA, ymmB, ymmC);
  e->vpaddd(ymmA, ymmB, anyptr_gpC);
  e->vpaddq(ymmA, ymmB, ymmC);
  e->vpaddq(ymmA, ymmB, anyptr_gpC);
  e->vpaddw(ymmA, ymmB, ymmC);
  e->vpaddw(ymmA, ymmB, anyptr_gpC);
  e->vpaddsb(ymmA, ymmB, ymmC);
  e->vpaddsb(ymmA, ymmB, anyptr_gpC);
  e->vpaddsw(ymmA, ymmB, ymmC);
  e->vpaddsw(ymmA, ymmB, anyptr_gpC);
  e->vpaddusb(ymmA, ymmB, ymmC);
  e->vpaddusb(ymmA, ymmB, anyptr_gpC);
  e->vpaddusw(ymmA, ymmB, ymmC);
  e->vpaddusw(ymmA, ymmB, anyptr_gpC);
  e->vpalignr(ymmA, ymmB, ymmC, 0);
  e->vpalignr(ymmA, ymmB, anyptr_gpC, 0);
  e->vpand(ymmA, ymmB, ymmC);
  e->vpand(ymmA, ymmB, anyptr_gpC);
  e->vpandn(ymmA, ymmB, ymmC);
  e->vpandn(ymmA, ymmB, anyptr_gpC);
  e->vpavgb(ymmA, ymmB, ymmC);
  e->vpavgb(ymmA, ymmB, anyptr_gpC);
  e->vpavgw(ymmA, ymmB, ymmC);
  e->vpavgw(ymmA, ymmB, anyptr_gpC);
  e->vpblendd(xmmA, xmmB, xmmC, 0);
  e->vpblendd(xmmA, xmmB, anyptr_gpC, 0);
  e->vpblendd(ymmA, ymmB, ymmC, 0);
  e->vpblendd(ymmA, ymmB, anyptr_gpC, 0);
  e->vpblendvb(ymmA, ymmB, ymmC, ymmD);
  e->vpblendvb(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vpblendw(ymmA, ymmB, ymmC, 0);
  e->vpblendw(ymmA, ymmB, anyptr_gpC, 0);
  e->vpbroadcastb(xmmA, xmmB);
  e->vpbroadcastb(xmmA, anyptr_gpB);
  e->vpbroadcastb(ymmA, xmmB);
  e->vpbroadcastb(ymmA, anyptr_gpB);
  e->vpbroadcastd(xmmA, xmmB);
  e->vpbroadcastd(xmmA, anyptr_gpB);
  e->vpbroadcastd(ymmA, xmmB);
  e->vpbroadcastd(ymmA, anyptr_gpB);
  e->vpbroadcastq(xmmA, xmmB);
  e->vpbroadcastq(xmmA, anyptr_gpB);
  e->vpbroadcastq(ymmA, xmmB);
  e->vpbroadcastq(ymmA, anyptr_gpB);
  e->vpbroadcastw(xmmA, xmmB);
  e->vpbroadcastw(xmmA, anyptr_gpB);
  e->vpbroadcastw(ymmA, xmmB);
  e->vpbroadcastw(ymmA, anyptr_gpB);
  e->vpcmpeqb(ymmA, ymmB, ymmC);
  e->vpcmpeqb(ymmA, ymmB, anyptr_gpC);
  e->vpcmpeqd(ymmA, ymmB, ymmC);
  e->vpcmpeqd(ymmA, ymmB, anyptr_gpC);
  e->vpcmpeqq(ymmA, ymmB, ymmC);
  e->vpcmpeqq(ymmA, ymmB, anyptr_gpC);
  e->vpcmpeqw(ymmA, ymmB, ymmC);
  e->vpcmpeqw(ymmA, ymmB, anyptr_gpC);
  e->vpcmpgtb(ymmA, ymmB, ymmC);
  e->vpcmpgtb(ymmA, ymmB, anyptr_gpC);
  e->vpcmpgtd(ymmA, ymmB, ymmC);
  e->vpcmpgtd(ymmA, ymmB, anyptr_gpC);
  e->vpcmpgtq(ymmA, ymmB, ymmC);
  e->vpcmpgtq(ymmA, ymmB, anyptr_gpC);
  e->vpcmpgtw(ymmA, ymmB, ymmC);
  e->vpcmpgtw(ymmA, ymmB, anyptr_gpC);
  e->vperm2i128(ymmA, ymmB, ymmC, 0);
  e->vperm2i128(ymmA, ymmB, anyptr_gpC, 0);
  e->vpermd(ymmA, ymmB, ymmC);
  e->vpermd(ymmA, ymmB, anyptr_gpC);
  e->vpermps(ymmA, ymmB, ymmC);
  e->vpermps(ymmA, ymmB, anyptr_gpC);
  e->vpermpd(ymmA, ymmB, 0);
  e->vpermpd(ymmA, anyptr_gpB, 0);
  e->vpermq(ymmA, ymmB, 0);
  e->vpermq(ymmA, anyptr_gpB, 0);
  e->vpgatherdd(xmmA, vx_ptr, xmmC);
  e->vpgatherdd(ymmA, vy_ptr, ymmC);
  e->vpgatherdq(xmmA, vx_ptr, xmmC);
  e->vpgatherdq(ymmA, vx_ptr, ymmC);
  e->vpgatherqd(xmmA, vx_ptr, xmmC);
  e->vpgatherqd(xmmA, vy_ptr, xmmC);
  e->vpgatherqq(xmmA, vx_ptr, xmmC);
  e->vpgatherqq(ymmA, vy_ptr, ymmC);
  e->vpmovmskb(gzA, ymmB);
  e->vpmovsxbd(ymmA, anyptr_gpB);
  e->vpmovsxbd(ymmA, xmmB);
  e->vpmovsxbq(ymmA, anyptr_gpB);
  e->vpmovsxbq(ymmA, xmmB);
  e->vpmovsxbw(ymmA, anyptr_gpB);
  e->vpmovsxbw(ymmA, xmmB);
  e->vpmovsxdq(ymmA, anyptr_gpB);
  e->vpmovsxdq(ymmA, xmmB);
  e->vpmovsxwd(ymmA, anyptr_gpB);
  e->vpmovsxwd(ymmA, xmmB);
  e->vpmovsxwq(ymmA, anyptr_gpB);
  e->vpmovsxwq(ymmA, xmmB);
  e->vpmovzxbd(ymmA, anyptr_gpB);
  e->vpmovzxbd(ymmA, xmmB);
  e->vpmovzxbq(ymmA, anyptr_gpB);
  e->vpmovzxbq(ymmA, xmmB);
  e->vpmovzxbw(ymmA, anyptr_gpB);
  e->vpmovzxbw(ymmA, xmmB);
  e->vpmovzxdq(ymmA, anyptr_gpB);
  e->vpmovzxdq(ymmA, xmmB);
  e->vpmovzxwd(ymmA, anyptr_gpB);
  e->vpmovzxwd(ymmA, xmmB);
  e->vpmovzxwq(ymmA, anyptr_gpB);
  e->vpmovzxwq(ymmA, xmmB);
  e->vpshufd(ymmA, anyptr_gpB, 0);
  e->vpshufd(ymmA, ymmB, 0);
  e->vpshufhw(ymmA, anyptr_gpB, 0);
  e->vpshufhw(ymmA, ymmB, 0);
  e->vpshuflw(ymmA, anyptr_gpB, 0);
  e->vpshuflw(ymmA, ymmB, 0);
  e->vpslld(ymmA, ymmB, 0);
  e->vpslldq(ymmA, ymmB, 0);
  e->vpsllq(ymmA, ymmB, 0);
  e->vpsllw(ymmA, ymmB, 0);
  e->vpsrad(ymmA, ymmB, 0);
  e->vpsraw(ymmA, ymmB, 0);
  e->vpsrld(ymmA, ymmB, 0);
  e->vpsrldq(ymmA, ymmB, 0);
  e->vpsrlq(ymmA, ymmB, 0);
  e->vpsrlw(ymmA, ymmB, 0);
  e->vphaddd(ymmA, ymmB, anyptr_gpC);
  e->vphaddd(ymmA, ymmB, ymmC);
  e->vphaddsw(ymmA, ymmB, anyptr_gpC);
  e->vphaddsw(ymmA, ymmB, ymmC);
  e->vphaddw(ymmA, ymmB, anyptr_gpC);
  e->vphaddw(ymmA, ymmB, ymmC);
  e->vphsubd(ymmA, ymmB, anyptr_gpC);
  e->vphsubd(ymmA, ymmB, ymmC);
  e->vphsubsw(ymmA, ymmB, anyptr_gpC);
  e->vphsubsw(ymmA, ymmB, ymmC);
  e->vphsubw(ymmA, ymmB, anyptr_gpC);
  e->vphsubw(ymmA, ymmB, ymmC);
  e->vpmaddubsw(ymmA, ymmB, anyptr_gpC);
  e->vpmaddubsw(ymmA, ymmB, ymmC);
  e->vpmaddwd(ymmA, ymmB, anyptr_gpC);
  e->vpmaddwd(ymmA, ymmB, ymmC);
  e->vpmaskmovd(anyptr_gpA, xmmB, xmmC);
  e->vpmaskmovd(anyptr_gpA, ymmB, ymmC);
  e->vpmaskmovd(xmmA, xmmB, anyptr_gpC);
  e->vpmaskmovd(ymmA, ymmB, anyptr_gpC);
  e->vpmaskmovq(anyptr_gpA, xmmB, xmmC);
  e->vpmaskmovq(anyptr_gpA, ymmB, ymmC);
  e->vpmaskmovq(xmmA, xmmB, anyptr_gpC);
  e->vpmaskmovq(ymmA, ymmB, anyptr_gpC);
  e->vpmaxsb(ymmA, ymmB, anyptr_gpC);
  e->vpmaxsb(ymmA, ymmB, ymmC);
  e->vpmaxsd(ymmA, ymmB, anyptr_gpC);
  e->vpmaxsd(ymmA, ymmB, ymmC);
  e->vpmaxsw(ymmA, ymmB, anyptr_gpC);
  e->vpmaxsw(ymmA, ymmB, ymmC);
  e->vpmaxub(ymmA, ymmB, anyptr_gpC);
  e->vpmaxub(ymmA, ymmB, ymmC);
  e->vpmaxud(ymmA, ymmB, anyptr_gpC);
  e->vpmaxud(ymmA, ymmB, ymmC);
  e->vpmaxuw(ymmA, ymmB, anyptr_gpC);
  e->vpmaxuw(ymmA, ymmB, ymmC);
  e->vpminsb(ymmA, ymmB, anyptr_gpC);
  e->vpminsb(ymmA, ymmB, ymmC);
  e->vpminsd(ymmA, ymmB, anyptr_gpC);
  e->vpminsd(ymmA, ymmB, ymmC);
  e->vpminsw(ymmA, ymmB, anyptr_gpC);
  e->vpminsw(ymmA, ymmB, ymmC);
  e->vpminub(ymmA, ymmB, anyptr_gpC);
  e->vpminub(ymmA, ymmB, ymmC);
  e->vpminud(ymmA, ymmB, anyptr_gpC);
  e->vpminud(ymmA, ymmB, ymmC);
  e->vpminuw(ymmA, ymmB, anyptr_gpC);
  e->vpminuw(ymmA, ymmB, ymmC);
  e->vpmuldq(ymmA, ymmB, anyptr_gpC);
  e->vpmuldq(ymmA, ymmB, ymmC);
  e->vpmulhrsw(ymmA, ymmB, anyptr_gpC);
  e->vpmulhrsw(ymmA, ymmB, ymmC);
  e->vpmulhuw(ymmA, ymmB, anyptr_gpC);
  e->vpmulhuw(ymmA, ymmB, ymmC);
  e->vpmulhw(ymmA, ymmB, anyptr_gpC);
  e->vpmulhw(ymmA, ymmB, ymmC);
  e->vpmulld(ymmA, ymmB, anyptr_gpC);
  e->vpmulld(ymmA, ymmB, ymmC);
  e->vpmullw(ymmA, ymmB, anyptr_gpC);
  e->vpmullw(ymmA, ymmB, ymmC);
  e->vpmuludq(ymmA, ymmB, anyptr_gpC);
  e->vpmuludq(ymmA, ymmB, ymmC);
  e->vpor(ymmA, ymmB, anyptr_gpC);
  e->vpor(ymmA, ymmB, ymmC);
  e->vpsadbw(ymmA, ymmB, anyptr_gpC);
  e->vpsadbw(ymmA, ymmB, ymmC);
  e->vpshufb(ymmA, ymmB, anyptr_gpC);
  e->vpshufb(ymmA, ymmB, ymmC);
  e->vpsignb(ymmA, ymmB, anyptr_gpC);
  e->vpsignb(ymmA, ymmB, ymmC);
  e->vpsignd(ymmA, ymmB, anyptr_gpC);
  e->vpsignd(ymmA, ymmB, ymmC);
  e->vpsignw(ymmA, ymmB, anyptr_gpC);
  e->vpsignw(ymmA, ymmB, ymmC);
  e->vpslld(ymmA, ymmB, anyptr_gpC);
  e->vpslld(ymmA, ymmB, xmmC);
  e->vpsllq(ymmA, ymmB, anyptr_gpC);
  e->vpsllq(ymmA, ymmB, xmmC);
  e->vpsllvd(xmmA, xmmB, anyptr_gpC);
  e->vpsllvd(xmmA, xmmB, xmmC);
  e->vpsllvd(ymmA, ymmB, anyptr_gpC);
  e->vpsllvd(ymmA, ymmB, ymmC);
  e->vpsllvq(xmmA, xmmB, anyptr_gpC);
  e->vpsllvq(xmmA, xmmB, xmmC);
  e->vpsllvq(ymmA, ymmB, anyptr_gpC);
  e->vpsllvq(ymmA, ymmB, ymmC);
  e->vpsllw(ymmA, ymmB, anyptr_gpC);
  e->vpsllw(ymmA, ymmB, xmmC);
  e->vpsrad(ymmA, ymmB, anyptr_gpC);
  e->vpsrad(ymmA, ymmB, xmmC);
  e->vpsravd(xmmA, xmmB, anyptr_gpC);
  e->vpsravd(xmmA, xmmB, xmmC);
  e->vpsravd(ymmA, ymmB, anyptr_gpC);
  e->vpsravd(ymmA, ymmB, ymmC);
  e->vpsraw(ymmA, ymmB, anyptr_gpC);
  e->vpsraw(ymmA, ymmB, xmmC);
  e->vpsrld(ymmA, ymmB, anyptr_gpC);
  e->vpsrld(ymmA, ymmB, xmmC);
  e->vpsrlq(ymmA, ymmB, anyptr_gpC);
  e->vpsrlq(ymmA, ymmB, xmmC);
  e->vpsrlvd(xmmA, xmmB, anyptr_gpC);
  e->vpsrlvd(xmmA, xmmB, xmmC);
  e->vpsrlvd(ymmA, ymmB, anyptr_gpC);
  e->vpsrlvd(ymmA, ymmB, ymmC);
  e->vpsrlvq(xmmA, xmmB, anyptr_gpC);
  e->vpsrlvq(xmmA, xmmB, xmmC);
  e->vpsrlvq(ymmA, ymmB, anyptr_gpC);
  e->vpsrlvq(ymmA, ymmB, ymmC);
  e->vpsrlw(ymmA, ymmB, anyptr_gpC);
  e->vpsrlw(ymmA, ymmB, xmmC);
  e->vpsubb(ymmA, ymmB, anyptr_gpC);
  e->vpsubb(ymmA, ymmB, ymmC);
  e->vpsubd(ymmA, ymmB, anyptr_gpC);
  e->vpsubd(ymmA, ymmB, ymmC);
  e->vpsubq(ymmA, ymmB, anyptr_gpC);
  e->vpsubq(ymmA, ymmB, ymmC);
  e->vpsubsb(ymmA, ymmB, anyptr_gpC);
  e->vpsubsb(ymmA, ymmB, ymmC);
  e->vpsubsw(ymmA, ymmB, anyptr_gpC);
  e->vpsubsw(ymmA, ymmB, ymmC);
  e->vpsubusb(ymmA, ymmB, anyptr_gpC);
  e->vpsubusb(ymmA, ymmB, ymmC);
  e->vpsubusw(ymmA, ymmB, anyptr_gpC);
  e->vpsubusw(ymmA, ymmB, ymmC);
  e->vpsubw(ymmA, ymmB, anyptr_gpC);
  e->vpsubw(ymmA, ymmB, ymmC);
  e->vpunpckhbw(ymmA, ymmB, anyptr_gpC);
  e->vpunpckhbw(ymmA, ymmB, ymmC);
  e->vpunpckhdq(ymmA, ymmB, anyptr_gpC);
  e->vpunpckhdq(ymmA, ymmB, ymmC);
  e->vpunpckhqdq(ymmA, ymmB, anyptr_gpC);
  e->vpunpckhqdq(ymmA, ymmB, ymmC);
  e->vpunpckhwd(ymmA, ymmB, anyptr_gpC);
  e->vpunpckhwd(ymmA, ymmB, ymmC);
  e->vpunpcklbw(ymmA, ymmB, anyptr_gpC);
  e->vpunpcklbw(ymmA, ymmB, ymmC);
  e->vpunpckldq(ymmA, ymmB, anyptr_gpC);
  e->vpunpckldq(ymmA, ymmB, ymmC);
  e->vpunpcklqdq(ymmA, ymmB, anyptr_gpC);
  e->vpunpcklqdq(ymmA, ymmB, ymmC);
  e->vpunpcklwd(ymmA, ymmB, anyptr_gpC);
  e->vpunpcklwd(ymmA, ymmB, ymmC);
  e->vpxor(ymmA, ymmB, anyptr_gpC);
  e->vpxor(ymmA, ymmB, ymmC);

  // FMA.
  e->nop();

  e->vfmadd132pd(xmmA, xmmB, anyptr_gpC);
  e->vfmadd132pd(xmmA, xmmB, xmmC);
  e->vfmadd132pd(ymmA, ymmB, anyptr_gpC);
  e->vfmadd132pd(ymmA, ymmB, ymmC);
  e->vfmadd132ps(xmmA, xmmB, anyptr_gpC);
  e->vfmadd132ps(xmmA, xmmB, xmmC);
  e->vfmadd132ps(ymmA, ymmB, anyptr_gpC);
  e->vfmadd132ps(ymmA, ymmB, ymmC);
  e->vfmadd132sd(xmmA, xmmB, anyptr_gpC);
  e->vfmadd132sd(xmmA, xmmB, xmmC);
  e->vfmadd132ss(xmmA, xmmB, anyptr_gpC);
  e->vfmadd132ss(xmmA, xmmB, xmmC);
  e->vfmadd213pd(xmmA, xmmB, anyptr_gpC);
  e->vfmadd213pd(xmmA, xmmB, xmmC);
  e->vfmadd213pd(ymmA, ymmB, anyptr_gpC);
  e->vfmadd213pd(ymmA, ymmB, ymmC);
  e->vfmadd213ps(xmmA, xmmB, anyptr_gpC);
  e->vfmadd213ps(xmmA, xmmB, xmmC);
  e->vfmadd213ps(ymmA, ymmB, anyptr_gpC);
  e->vfmadd213ps(ymmA, ymmB, ymmC);
  e->vfmadd213sd(xmmA, xmmB, anyptr_gpC);
  e->vfmadd213sd(xmmA, xmmB, xmmC);
  e->vfmadd213ss(xmmA, xmmB, anyptr_gpC);
  e->vfmadd213ss(xmmA, xmmB, xmmC);
  e->vfmadd231pd(xmmA, xmmB, anyptr_gpC);
  e->vfmadd231pd(xmmA, xmmB, xmmC);
  e->vfmadd231pd(ymmA, ymmB, anyptr_gpC);
  e->vfmadd231pd(ymmA, ymmB, ymmC);
  e->vfmadd231ps(xmmA, xmmB, anyptr_gpC);
  e->vfmadd231ps(xmmA, xmmB, xmmC);
  e->vfmadd231ps(ymmA, ymmB, anyptr_gpC);
  e->vfmadd231ps(ymmA, ymmB, ymmC);
  e->vfmadd231sd(xmmA, xmmB, anyptr_gpC);
  e->vfmadd231sd(xmmA, xmmB, xmmC);
  e->vfmadd231ss(xmmA, xmmB, anyptr_gpC);
  e->vfmadd231ss(xmmA, xmmB, xmmC);
  e->vfmaddsub132pd(xmmA, xmmB, anyptr_gpC);
  e->vfmaddsub132pd(xmmA, xmmB, xmmC);
  e->vfmaddsub132pd(ymmA, ymmB, anyptr_gpC);
  e->vfmaddsub132pd(ymmA, ymmB, ymmC);
  e->vfmaddsub132ps(xmmA, xmmB, anyptr_gpC);
  e->vfmaddsub132ps(xmmA, xmmB, xmmC);
  e->vfmaddsub132ps(ymmA, ymmB, anyptr_gpC);
  e->vfmaddsub132ps(ymmA, ymmB, ymmC);
  e->vfmaddsub213pd(xmmA, xmmB, anyptr_gpC);
  e->vfmaddsub213pd(xmmA, xmmB, xmmC);
  e->vfmaddsub213pd(ymmA, ymmB, anyptr_gpC);
  e->vfmaddsub213pd(ymmA, ymmB, ymmC);
  e->vfmaddsub213ps(xmmA, xmmB, anyptr_gpC);
  e->vfmaddsub213ps(xmmA, xmmB, xmmC);
  e->vfmaddsub213ps(ymmA, ymmB, anyptr_gpC);
  e->vfmaddsub213ps(ymmA, ymmB, ymmC);
  e->vfmaddsub231pd(xmmA, xmmB, anyptr_gpC);
  e->vfmaddsub231pd(xmmA, xmmB, xmmC);
  e->vfmaddsub231pd(ymmA, ymmB, anyptr_gpC);
  e->vfmaddsub231pd(ymmA, ymmB, ymmC);
  e->vfmaddsub231ps(xmmA, xmmB, anyptr_gpC);
  e->vfmaddsub231ps(xmmA, xmmB, xmmC);
  e->vfmaddsub231ps(ymmA, ymmB, anyptr_gpC);
  e->vfmaddsub231ps(ymmA, ymmB, ymmC);
  e->vfmsub132pd(xmmA, xmmB, anyptr_gpC);
  e->vfmsub132pd(xmmA, xmmB, xmmC);
  e->vfmsub132pd(ymmA, ymmB, anyptr_gpC);
  e->vfmsub132pd(ymmA, ymmB, ymmC);
  e->vfmsub132ps(xmmA, xmmB, anyptr_gpC);
  e->vfmsub132ps(xmmA, xmmB, xmmC);
  e->vfmsub132ps(ymmA, ymmB, anyptr_gpC);
  e->vfmsub132ps(ymmA, ymmB, ymmC);
  e->vfmsub132sd(xmmA, xmmB, anyptr_gpC);
  e->vfmsub132sd(xmmA, xmmB, xmmC);
  e->vfmsub132ss(xmmA, xmmB, anyptr_gpC);
  e->vfmsub132ss(xmmA, xmmB, xmmC);
  e->vfmsub213pd(xmmA, xmmB, anyptr_gpC);
  e->vfmsub213pd(xmmA, xmmB, xmmC);
  e->vfmsub213pd(ymmA, ymmB, anyptr_gpC);
  e->vfmsub213pd(ymmA, ymmB, ymmC);
  e->vfmsub213ps(xmmA, xmmB, anyptr_gpC);
  e->vfmsub213ps(xmmA, xmmB, xmmC);
  e->vfmsub213ps(ymmA, ymmB, anyptr_gpC);
  e->vfmsub213ps(ymmA, ymmB, ymmC);
  e->vfmsub213sd(xmmA, xmmB, anyptr_gpC);
  e->vfmsub213sd(xmmA, xmmB, xmmC);
  e->vfmsub213ss(xmmA, xmmB, anyptr_gpC);
  e->vfmsub213ss(xmmA, xmmB, xmmC);
  e->vfmsub231pd(xmmA, xmmB, anyptr_gpC);
  e->vfmsub231pd(xmmA, xmmB, xmmC);
  e->vfmsub231pd(ymmA, ymmB, anyptr_gpC);
  e->vfmsub231pd(ymmA, ymmB, ymmC);
  e->vfmsub231ps(xmmA, xmmB, anyptr_gpC);
  e->vfmsub231ps(xmmA, xmmB, xmmC);
  e->vfmsub231ps(ymmA, ymmB, anyptr_gpC);
  e->vfmsub231ps(ymmA, ymmB, ymmC);
  e->vfmsub231sd(xmmA, xmmB, anyptr_gpC);
  e->vfmsub231sd(xmmA, xmmB, xmmC);
  e->vfmsub231ss(xmmA, xmmB, anyptr_gpC);
  e->vfmsub231ss(xmmA, xmmB, xmmC);
  e->vfmsubadd132pd(xmmA, xmmB, anyptr_gpC);
  e->vfmsubadd132pd(xmmA, xmmB, xmmC);
  e->vfmsubadd132pd(ymmA, ymmB, anyptr_gpC);
  e->vfmsubadd132pd(ymmA, ymmB, ymmC);
  e->vfmsubadd132ps(xmmA, xmmB, anyptr_gpC);
  e->vfmsubadd132ps(xmmA, xmmB, xmmC);
  e->vfmsubadd132ps(ymmA, ymmB, anyptr_gpC);
  e->vfmsubadd132ps(ymmA, ymmB, ymmC);
  e->vfmsubadd213pd(xmmA, xmmB, anyptr_gpC);
  e->vfmsubadd213pd(xmmA, xmmB, xmmC);
  e->vfmsubadd213pd(ymmA, ymmB, anyptr_gpC);
  e->vfmsubadd213pd(ymmA, ymmB, ymmC);
  e->vfmsubadd213ps(xmmA, xmmB, anyptr_gpC);
  e->vfmsubadd213ps(xmmA, xmmB, xmmC);
  e->vfmsubadd213ps(ymmA, ymmB, anyptr_gpC);
  e->vfmsubadd213ps(ymmA, ymmB, ymmC);
  e->vfmsubadd231pd(xmmA, xmmB, anyptr_gpC);
  e->vfmsubadd231pd(xmmA, xmmB, xmmC);
  e->vfmsubadd231pd(ymmA, ymmB, anyptr_gpC);
  e->vfmsubadd231pd(ymmA, ymmB, ymmC);
  e->vfmsubadd231ps(xmmA, xmmB, anyptr_gpC);
  e->vfmsubadd231ps(xmmA, xmmB, xmmC);
  e->vfmsubadd231ps(ymmA, ymmB, anyptr_gpC);
  e->vfmsubadd231ps(ymmA, ymmB, ymmC);
  e->vfnmadd132pd(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd132pd(xmmA, xmmB, xmmC);
  e->vfnmadd132pd(ymmA, ymmB, anyptr_gpC);
  e->vfnmadd132pd(ymmA, ymmB, ymmC);
  e->vfnmadd132ps(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd132ps(xmmA, xmmB, xmmC);
  e->vfnmadd132ps(ymmA, ymmB, anyptr_gpC);
  e->vfnmadd132ps(ymmA, ymmB, ymmC);
  e->vfnmadd132sd(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd132sd(xmmA, xmmB, xmmC);
  e->vfnmadd132ss(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd132ss(xmmA, xmmB, xmmC);
  e->vfnmadd213pd(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd213pd(xmmA, xmmB, xmmC);
  e->vfnmadd213pd(ymmA, ymmB, anyptr_gpC);
  e->vfnmadd213pd(ymmA, ymmB, ymmC);
  e->vfnmadd213ps(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd213ps(xmmA, xmmB, xmmC);
  e->vfnmadd213ps(ymmA, ymmB, anyptr_gpC);
  e->vfnmadd213ps(ymmA, ymmB, ymmC);
  e->vfnmadd213sd(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd213sd(xmmA, xmmB, xmmC);
  e->vfnmadd213ss(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd213ss(xmmA, xmmB, xmmC);
  e->vfnmadd231pd(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd231pd(xmmA, xmmB, xmmC);
  e->vfnmadd231pd(ymmA, ymmB, anyptr_gpC);
  e->vfnmadd231pd(ymmA, ymmB, ymmC);
  e->vfnmadd231ps(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd231ps(xmmA, xmmB, xmmC);
  e->vfnmadd231ps(ymmA, ymmB, anyptr_gpC);
  e->vfnmadd231ps(ymmA, ymmB, ymmC);
  e->vfnmadd231sd(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd231sd(xmmA, xmmB, xmmC);
  e->vfnmadd231ss(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd231ss(xmmA, xmmB, xmmC);
  e->vfnmsub132pd(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub132pd(xmmA, xmmB, xmmC);
  e->vfnmsub132pd(ymmA, ymmB, anyptr_gpC);
  e->vfnmsub132pd(ymmA, ymmB, ymmC);
  e->vfnmsub132ps(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub132ps(xmmA, xmmB, xmmC);
  e->vfnmsub132ps(ymmA, ymmB, anyptr_gpC);
  e->vfnmsub132ps(ymmA, ymmB, ymmC);
  e->vfnmsub132sd(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub132sd(xmmA, xmmB, xmmC);
  e->vfnmsub132ss(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub132ss(xmmA, xmmB, xmmC);
  e->vfnmsub213pd(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub213pd(xmmA, xmmB, xmmC);
  e->vfnmsub213pd(ymmA, ymmB, anyptr_gpC);
  e->vfnmsub213pd(ymmA, ymmB, ymmC);
  e->vfnmsub213ps(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub213ps(xmmA, xmmB, xmmC);
  e->vfnmsub213ps(ymmA, ymmB, anyptr_gpC);
  e->vfnmsub213ps(ymmA, ymmB, ymmC);
  e->vfnmsub213sd(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub213sd(xmmA, xmmB, xmmC);
  e->vfnmsub213ss(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub213ss(xmmA, xmmB, xmmC);
  e->vfnmsub231pd(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub231pd(xmmA, xmmB, xmmC);
  e->vfnmsub231pd(ymmA, ymmB, anyptr_gpC);
  e->vfnmsub231pd(ymmA, ymmB, ymmC);
  e->vfnmsub231ps(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub231ps(xmmA, xmmB, xmmC);
  e->vfnmsub231ps(ymmA, ymmB, anyptr_gpC);
  e->vfnmsub231ps(ymmA, ymmB, ymmC);
  e->vfnmsub231sd(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub231sd(xmmA, xmmB, xmmC);
  e->vfnmsub231ss(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub231ss(xmmA, xmmB, xmmC);

  // FMA4.
  e->nop();

  e->vfmaddpd(xmmA, xmmB, xmmC, xmmD);
  e->vfmaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfmaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfmaddpd(ymmA, ymmB, ymmC, ymmD);
  e->vfmaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vfmaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
  e->vfmaddps(xmmA, xmmB, xmmC, xmmD);
  e->vfmaddps(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfmaddps(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfmaddps(ymmA, ymmB, ymmC, ymmD);
  e->vfmaddps(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vfmaddps(ymmA, ymmB, ymmC, anyptr_gpD);
  e->vfmaddsd(xmmA, xmmB, xmmC, xmmD);
  e->vfmaddsd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfmaddsd(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfmaddss(xmmA, xmmB, xmmC, xmmD);
  e->vfmaddss(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfmaddss(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfmaddsubpd(xmmA, xmmB, xmmC, xmmD);
  e->vfmaddsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfmaddsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfmaddsubpd(ymmA, ymmB, ymmC, ymmD);
  e->vfmaddsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vfmaddsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
  e->vfmaddsubps(xmmA, xmmB, xmmC, xmmD);
  e->vfmaddsubps(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfmaddsubps(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfmaddsubps(ymmA, ymmB, ymmC, ymmD);
  e->vfmaddsubps(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vfmaddsubps(ymmA, ymmB, ymmC, anyptr_gpD);
  e->vfmsubaddpd(xmmA, xmmB, xmmC, xmmD);
  e->vfmsubaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfmsubaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfmsubaddpd(ymmA, ymmB, ymmC, ymmD);
  e->vfmsubaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vfmsubaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
  e->vfmsubaddps(xmmA, xmmB, xmmC, xmmD);
  e->vfmsubaddps(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfmsubaddps(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfmsubaddps(ymmA, ymmB, ymmC, ymmD);
  e->vfmsubaddps(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vfmsubaddps(ymmA, ymmB, ymmC, anyptr_gpD);
  e->vfmsubpd(xmmA, xmmB, xmmC, xmmD);
  e->vfmsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfmsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfmsubpd(ymmA, ymmB, ymmC, ymmD);
  e->vfmsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vfmsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
  e->vfmsubps(xmmA, xmmB, xmmC, xmmD);
  e->vfmsubps(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfmsubps(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfmsubps(ymmA, ymmB, ymmC, ymmD);
  e->vfmsubps(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vfmsubps(ymmA, ymmB, ymmC, anyptr_gpD);
  e->vfmsubsd(xmmA, xmmB, xmmC, xmmD);
  e->vfmsubsd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfmsubsd(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfmsubss(xmmA, xmmB, xmmC, xmmD);
  e->vfmsubss(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfmsubss(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfnmaddpd(xmmA, xmmB, xmmC, xmmD);
  e->vfnmaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfnmaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfnmaddpd(ymmA, ymmB, ymmC, ymmD);
  e->vfnmaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vfnmaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
  e->vfnmaddps(xmmA, xmmB, xmmC, xmmD);
  e->vfnmaddps(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfnmaddps(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfnmaddps(ymmA, ymmB, ymmC, ymmD);
  e->vfnmaddps(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vfnmaddps(ymmA, ymmB, ymmC, anyptr_gpD);
  e->vfnmaddsd(xmmA, xmmB, xmmC, xmmD);
  e->vfnmaddsd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfnmaddsd(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfnmaddss(xmmA, xmmB, xmmC, xmmD);
  e->vfnmaddss(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfnmaddss(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfnmsubpd(xmmA, xmmB, xmmC, xmmD);
  e->vfnmsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfnmsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfnmsubpd(ymmA, ymmB, ymmC, ymmD);
  e->vfnmsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vfnmsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
  e->vfnmsubps(xmmA, xmmB, xmmC, xmmD);
  e->vfnmsubps(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfnmsubps(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfnmsubps(ymmA, ymmB, ymmC, ymmD);
  e->vfnmsubps(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vfnmsubps(ymmA, ymmB, ymmC, anyptr_gpD);
  e->vfnmsubsd(xmmA, xmmB, xmmC, xmmD);
  e->vfnmsubsd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfnmsubsd(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vfnmsubss(xmmA, xmmB, xmmC, xmmD);
  e->vfnmsubss(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vfnmsubss(xmmA, xmmB, xmmC, anyptr_gpD);

  // XOP.
  e->nop();

  e->vfrczpd(xmmA, xmmB);
  e->vfrczpd(xmmA, anyptr_gpB);
  e->vfrczpd(ymmA, ymmB);
  e->vfrczpd(ymmA, anyptr_gpB);
  e->vfrczps(xmmA, xmmB);
  e->vfrczps(xmmA, anyptr_gpB);
  e->vfrczps(ymmA, ymmB);
  e->vfrczps(ymmA, anyptr_gpB);
  e->vfrczsd(xmmA, xmmB);
  e->vfrczsd(xmmA, anyptr_gpB);
  e->vfrczss(xmmA, xmmB);
  e->vfrczss(xmmA, anyptr_gpB);
  e->vpcmov(xmmA, xmmB, xmmC, xmmD);
  e->vpcmov(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpcmov(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vpcmov(ymmA, ymmB, ymmC, ymmD);
  e->vpcmov(ymmA, ymmB, anyptr_gpC, ymmD);
  e->vpcmov(ymmA, ymmB, ymmC, anyptr_gpD);
  e->vpcomb(xmmA, xmmB, xmmC, 0);
  e->vpcomb(xmmA, xmmB, anyptr_gpC, 0);
  e->vpcomd(xmmA, xmmB, xmmC, 0);
  e->vpcomd(xmmA, xmmB, anyptr_gpC, 0);
  e->vpcomq(xmmA, xmmB, xmmC, 0);
  e->vpcomq(xmmA, xmmB, anyptr_gpC, 0);
  e->vpcomw(xmmA, xmmB, xmmC, 0);
  e->vpcomw(xmmA, xmmB, anyptr_gpC, 0);
  e->vpcomub(xmmA, xmmB, xmmC, 0);
  e->vpcomub(xmmA, xmmB, anyptr_gpC, 0);
  e->vpcomud(xmmA, xmmB, xmmC, 0);
  e->vpcomud(xmmA, xmmB, anyptr_gpC, 0);
  e->vpcomuq(xmmA, xmmB, xmmC, 0);
  e->vpcomuq(xmmA, xmmB, anyptr_gpC, 0);
  e->vpcomuw(xmmA, xmmB, xmmC, 0);
  e->vpcomuw(xmmA, xmmB, anyptr_gpC, 0);
  e->vpermil2pd(xmmA, xmmB, xmmC, xmmD, 0);
  e->vpermil2pd(xmmA, xmmB, anyptr_gpC, xmmD, 0);
  e->vpermil2pd(xmmA, xmmB, xmmC, anyptr_gpD, 0);
  e->vpermil2pd(ymmA, ymmB, ymmC, ymmD, 0);
  e->vpermil2pd(ymmA, ymmB, anyptr_gpC, ymmD, 0);
  e->vpermil2pd(ymmA, ymmB, ymmC, anyptr_gpD, 0);
  e->vpermil2ps(xmmA, xmmB, xmmC, xmmD, 0);
  e->vpermil2ps(xmmA, xmmB, anyptr_gpC, xmmD, 0);
  e->vpermil2ps(xmmA, xmmB, xmmC, anyptr_gpD, 0);
  e->vpermil2ps(ymmA, ymmB, ymmC, ymmD, 0);
  e->vpermil2ps(ymmA, ymmB, anyptr_gpC, ymmD, 0);
  e->vpermil2ps(ymmA, ymmB, ymmC, anyptr_gpD, 0);
  e->vphaddbd(xmmA, xmmB);
  e->vphaddbd(xmmA, anyptr_gpB);
  e->vphaddbq(xmmA, xmmB);
  e->vphaddbq(xmmA, anyptr_gpB);
  e->vphaddbw(xmmA, xmmB);
  e->vphaddbw(xmmA, anyptr_gpB);
  e->vphadddq(xmmA, xmmB);
  e->vphadddq(xmmA, anyptr_gpB);
  e->vphaddwd(xmmA, xmmB);
  e->vphaddwd(xmmA, anyptr_gpB);
  e->vphaddwq(xmmA, xmmB);
  e->vphaddwq(xmmA, anyptr_gpB);
  e->vphaddubd(xmmA, xmmB);
  e->vphaddubd(xmmA, anyptr_gpB);
  e->vphaddubq(xmmA, xmmB);
  e->vphaddubq(xmmA, anyptr_gpB);
  e->vphaddubw(xmmA, xmmB);
  e->vphaddubw(xmmA, anyptr_gpB);
  e->vphaddudq(xmmA, xmmB);
  e->vphaddudq(xmmA, anyptr_gpB);
  e->vphadduwd(xmmA, xmmB);
  e->vphadduwd(xmmA, anyptr_gpB);
  e->vphadduwq(xmmA, xmmB);
  e->vphadduwq(xmmA, anyptr_gpB);
  e->vphsubbw(xmmA, xmmB);
  e->vphsubbw(xmmA, anyptr_gpB);
  e->vphsubdq(xmmA, xmmB);
  e->vphsubdq(xmmA, anyptr_gpB);
  e->vphsubwd(xmmA, xmmB);
  e->vphsubwd(xmmA, anyptr_gpB);
  e->vpmacsdd(xmmA, xmmB, xmmC, xmmD);
  e->vpmacsdd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpmacsdqh(xmmA, xmmB, xmmC, xmmD);
  e->vpmacsdqh(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpmacsdql(xmmA, xmmB, xmmC, xmmD);
  e->vpmacsdql(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpmacswd(xmmA, xmmB, xmmC, xmmD);
  e->vpmacswd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpmacsww(xmmA, xmmB, xmmC, xmmD);
  e->vpmacsww(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpmacssdd(xmmA, xmmB, xmmC, xmmD);
  e->vpmacssdd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpmacssdqh(xmmA, xmmB, xmmC, xmmD);
  e->vpmacssdqh(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpmacssdql(xmmA, xmmB, xmmC, xmmD);
  e->vpmacssdql(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpmacsswd(xmmA, xmmB, xmmC, xmmD);
  e->vpmacsswd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpmacssww(xmmA, xmmB, xmmC, xmmD);
  e->vpmacssww(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpmadcsswd(xmmA, xmmB, xmmC, xmmD);
  e->vpmadcsswd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpmadcswd(xmmA, xmmB, xmmC, xmmD);
  e->vpmadcswd(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpperm(xmmA, xmmB, xmmC, xmmD);
  e->vpperm(xmmA, xmmB, anyptr_gpC, xmmD);
  e->vpperm(xmmA, xmmB, xmmC, anyptr_gpD);
  e->vprotb(xmmA, xmmB, xmmC);
  e->vprotb(xmmA, anyptr_gpB, xmmC);
  e->vprotb(xmmA, xmmB, anyptr_gpC);
  e->vprotb(xmmA, xmmB, 0);
  e->vprotb(xmmA, anyptr_gpB, 0);
  e->vprotd(xmmA, xmmB, xmmC);
  e->vprotd(xmmA, anyptr_gpB, xmmC);
  e->vprotd(xmmA, xmmB, anyptr_gpC);
  e->vprotd(xmmA, xmmB, 0);
  e->vprotd(xmmA, anyptr_gpB, 0);
  e->vprotq(xmmA, xmmB, xmmC);
  e->vprotq(xmmA, anyptr_gpB, xmmC);
  e->vprotq(xmmA, xmmB, anyptr_gpC);
  e->vprotq(xmmA, xmmB, 0);
  e->vprotq(xmmA, anyptr_gpB, 0);
  e->vprotw(xmmA, xmmB, xmmC);
  e->vprotw(xmmA, anyptr_gpB, xmmC);
  e->vprotw(xmmA, xmmB, anyptr_gpC);
  e->vprotw(xmmA, xmmB, 0);
  e->vprotw(xmmA, anyptr_gpB, 0);
  e->vpshab(xmmA, xmmB, xmmC);
  e->vpshab(xmmA, anyptr_gpB, xmmC);
  e->vpshab(xmmA, xmmB, anyptr_gpC);
  e->vpshad(xmmA, xmmB, xmmC);
  e->vpshad(xmmA, anyptr_gpB, xmmC);
  e->vpshad(xmmA, xmmB, anyptr_gpC);
  e->vpshaq(xmmA, xmmB, xmmC);
  e->vpshaq(xmmA, anyptr_gpB, xmmC);
  e->vpshaq(xmmA, xmmB, anyptr_gpC);
  e->vpshaw(xmmA, xmmB, xmmC);
  e->vpshaw(xmmA, anyptr_gpB, xmmC);
  e->vpshaw(xmmA, xmmB, anyptr_gpC);
  e->vpshlb(xmmA, xmmB, xmmC);
  e->vpshlb(xmmA, anyptr_gpB, xmmC);
  e->vpshlb(xmmA, xmmB, anyptr_gpC);
  e->vpshld(xmmA, xmmB, xmmC);
  e->vpshld(xmmA, anyptr_gpB, xmmC);
  e->vpshld(xmmA, xmmB, anyptr_gpC);
  e->vpshlq(xmmA, xmmB, xmmC);
  e->vpshlq(xmmA, anyptr_gpB, xmmC);
  e->vpshlq(xmmA, xmmB, anyptr_gpC);
  e->vpshlw(xmmA, xmmB, xmmC);
  e->vpshlw(xmmA, anyptr_gpB, xmmC);
  e->vpshlw(xmmA, xmmB, anyptr_gpC);

  // F16C.
  e->nop();

  e->vcvtph2ps(xmmA, xmmB);
  e->vcvtph2ps(xmmA, anyptr_gpB);
  e->vcvtph2ps(ymmA, xmmB);
  e->vcvtph2ps(ymmA, anyptr_gpB);
  e->vcvtps2ph(xmmA, xmmB, 0);
  e->vcvtps2ph(anyptr_gpA, xmmB, 0);
  e->vcvtps2ph(xmmA, ymmB, 0);
  e->vcvtps2ph(anyptr_gpA, ymmB, 0);

  // AVX512.
  e->nop();

  e->kaddb(kA, kB, kC);
  e->kaddd(kA, kB, kC);
  e->kaddq(kA, kB, kC);
  e->kaddw(kA, kB, kC);
  e->kandb(kA, kB, kC);
  e->kandd(kA, kB, kC);
  e->kandnb(kA, kB, kC);
  e->kandnd(kA, kB, kC);
  e->kandnq(kA, kB, kC);
  e->kandnw(kA, kB, kC);
  e->kandq(kA, kB, kC);
  e->kandw(kA, kB, kC);
  e->kmovb(kA, kB);
  e->kmovb(kA, anyptr_gpB);
  e->kmovb(kA, gdB);
  if (isX64) e->kmovb(kA, gzB);
  e->kmovb(anyptr_gpA, kB);
  e->kmovb(gdA, kB);
  if (isX64) e->kmovb(gzA, kB);
  e->kmovd(kA, kB);
  e->kmovd(kA, anyptr_gpB);
  e->kmovd(kA, gdB);
  if (isX64) e->kmovd(kA, gzB);
  e->kmovd(anyptr_gpA, kB);
  e->kmovd(gdA, kB);
  if (isX64) e->kmovd(gzA, kB);
  e->kmovq(kA, kB);
  e->kmovq(kA, anyptr_gpB);
  if (isX64) e->kmovq(kA, gzB);
  e->kmovq(anyptr_gpA, kB);
  if (isX64) e->kmovq(gzA, kB);
  e->kmovw(kA, kB);
  e->kmovw(kA, anyptr_gpB);
  e->kmovw(kA, gdB);
  if (isX64) e->kmovw(kA, gzB);
  e->kmovw(anyptr_gpA, kB);
  e->kmovw(gdA, kB);
  if (isX64) e->kmovw(gzA, kB);
  e->knotb(kA, kB);
  e->knotd(kA, kB);
  e->knotq(kA, kB);
  e->knotw(kA, kB);
  e->korb(kA, kB, kC);
  e->kord(kA, kB, kC);
  e->korq(kA, kB, kC);
  e->kortestb(kA, kB);
  e->kortestd(kA, kB);
  e->kortestq(kA, kB);
  e->kortestw(kA, kB);
  e->korw(kA, kB, kC);
  e->kshiftlb(kA, kB, 0);
  e->kshiftld(kA, kB, 0);
  e->kshiftlq(kA, kB, 0);
  e->kshiftlw(kA, kB, 0);
  e->kshiftrb(kA, kB, 0);
  e->kshiftrd(kA, kB, 0);
  e->kshiftrq(kA, kB, 0);
  e->kshiftrw(kA, kB, 0);
  e->ktestb(kA, kB);
  e->ktestd(kA, kB);
  e->ktestq(kA, kB);
  e->ktestw(kA, kB);
  e->kunpckbw(kA, kB, kC);
  e->kunpckdq(kA, kB, kC);
  e->kunpckwd(kA, kB, kC);
  e->kxnorb(kA, kB, kC);
  e->kxnord(kA, kB, kC);
  e->kxnorq(kA, kB, kC);
  e->kxnorw(kA, kB, kC);
  e->kxorb(kA, kB, kC);
  e->kxord(kA, kB, kC);
  e->kxorq(kA, kB, kC);
  e->kxorw(kA, kB, kC);
  e->nop();

  e->vaddpd(xmmA, xmmB, xmmC);
  e->vaddpd(xmmA, xmmB, anyptr_gpC);
  e->vaddpd(ymmA, ymmB, ymmC);
  e->vaddpd(ymmA, ymmB, anyptr_gpC);
  e->vaddpd(zmmA, zmmB, zmmC);
  e->vaddpd(zmmA, zmmB, anyptr_gpC);
  e->vaddps(xmmA, xmmB, xmmC);
  e->vaddps(xmmA, xmmB, anyptr_gpC);
  e->vaddps(ymmA, ymmB, ymmC);
  e->vaddps(ymmA, ymmB, anyptr_gpC);
  e->vaddps(zmmA, zmmB, zmmC);
  e->vaddps(zmmA, zmmB, anyptr_gpC);
  e->vaddsd(xmmA, xmmB, xmmC);
  e->vaddsd(xmmA, xmmB, anyptr_gpC);
  e->vaddss(xmmA, xmmB, xmmC);
  e->vaddss(xmmA, xmmB, anyptr_gpC);
  e->valignd(xmmA, xmmB, xmmC, 0);
  e->valignd(xmmA, xmmB, anyptr_gpC, 0);
  e->valignd(ymmA, ymmB, ymmC, 0);
  e->valignd(ymmA, ymmB, anyptr_gpC, 0);
  e->valignd(zmmA, zmmB, zmmC, 0);
  e->valignd(zmmA, zmmB, anyptr_gpC, 0);
  e->valignq(xmmA, xmmB, xmmC, 0);
  e->valignq(xmmA, xmmB, anyptr_gpC, 0);
  e->valignq(ymmA, ymmB, ymmC, 0);
  e->valignq(ymmA, ymmB, anyptr_gpC, 0);
  e->valignq(zmmA, zmmB, zmmC, 0);
  e->valignq(zmmA, zmmB, anyptr_gpC, 0);
  e->vandnpd(xmmA, xmmB, xmmC);
  e->vandnpd(xmmA, xmmB, anyptr_gpC);
  e->vandnpd(ymmA, ymmB, ymmC);
  e->vandnpd(ymmA, ymmB, anyptr_gpC);
  e->vandnpd(zmmA, zmmB, zmmC);
  e->vandnpd(zmmA, zmmB, anyptr_gpC);
  e->vandnps(xmmA, xmmB, xmmC);
  e->vandnps(xmmA, xmmB, anyptr_gpC);
  e->vandnps(ymmA, ymmB, ymmC);
  e->vandnps(ymmA, ymmB, anyptr_gpC);
  e->vandnps(zmmA, zmmB, zmmC);
  e->vandnps(zmmA, zmmB, anyptr_gpC);
  e->vandpd(xmmA, xmmB, xmmC);
  e->vandpd(xmmA, xmmB, anyptr_gpC);
  e->vandpd(ymmA, ymmB, ymmC);
  e->vandpd(ymmA, ymmB, anyptr_gpC);
  e->vandpd(zmmA, zmmB, zmmC);
  e->vandpd(zmmA, zmmB, anyptr_gpC);
  e->vandps(xmmA, xmmB, xmmC);
  e->vandps(xmmA, xmmB, anyptr_gpC);
  e->vandps(ymmA, ymmB, ymmC);
  e->vandps(ymmA, ymmB, anyptr_gpC);
  e->vandps(zmmA, zmmB, zmmC);
  e->vandps(zmmA, zmmB, anyptr_gpC);
  e->vblendmb(xmmA, xmmB, xmmC);
  e->vblendmb(xmmA, xmmB, anyptr_gpC);
  e->vblendmb(ymmA, ymmB, ymmC);
  e->vblendmb(ymmA, ymmB, anyptr_gpC);
  e->vblendmb(zmmA, zmmB, zmmC);
  e->vblendmb(zmmA, zmmB, anyptr_gpC);
  e->vblendmd(xmmA, xmmB, xmmC);
  e->vblendmd(xmmA, xmmB, anyptr_gpC);
  e->vblendmd(ymmA, ymmB, ymmC);
  e->vblendmd(ymmA, ymmB, anyptr_gpC);
  e->vblendmd(zmmA, zmmB, zmmC);
  e->vblendmd(zmmA, zmmB, anyptr_gpC);
  e->vblendmpd(xmmA, xmmB, xmmC);
  e->vblendmpd(xmmA, xmmB, anyptr_gpC);
  e->vblendmpd(ymmA, ymmB, ymmC);
  e->vblendmpd(ymmA, ymmB, anyptr_gpC);
  e->vblendmpd(zmmA, zmmB, zmmC);
  e->vblendmpd(zmmA, zmmB, anyptr_gpC);
  e->vblendmps(xmmA, xmmB, xmmC);
  e->vblendmps(xmmA, xmmB, anyptr_gpC);
  e->vblendmps(ymmA, ymmB, ymmC);
  e->vblendmps(ymmA, ymmB, anyptr_gpC);
  e->vblendmps(zmmA, zmmB, zmmC);
  e->vblendmps(zmmA, zmmB, anyptr_gpC);
  e->vblendmq(xmmA, xmmB, xmmC);
  e->vblendmq(xmmA, xmmB, anyptr_gpC);
  e->vblendmq(ymmA, ymmB, ymmC);
  e->vblendmq(ymmA, ymmB, anyptr_gpC);
  e->vblendmq(zmmA, zmmB, zmmC);
  e->vblendmq(zmmA, zmmB, anyptr_gpC);
  e->vblendmw(xmmA, xmmB, xmmC);
  e->vblendmw(xmmA, xmmB, anyptr_gpC);
  e->vblendmw(ymmA, ymmB, ymmC);
  e->vblendmw(ymmA, ymmB, anyptr_gpC);
  e->vblendmw(zmmA, zmmB, zmmC);
  e->vblendmw(zmmA, zmmB, anyptr_gpC);
  e->vbroadcastf32x2(ymmA, xmmB);
  e->vbroadcastf32x2(ymmA, anyptr_gpB);
  e->vbroadcastf32x2(zmmA, xmmB);
  e->vbroadcastf32x2(zmmA, anyptr_gpB);
  e->vbroadcastf32x4(ymmA, anyptr_gpB);
  e->vbroadcastf32x4(zmmA, anyptr_gpB);
  e->vbroadcastf32x8(zmmA, anyptr_gpB);
  e->vbroadcastf64x2(ymmA, anyptr_gpB);
  e->vbroadcastf64x2(zmmA, anyptr_gpB);
  e->vbroadcastf64x4(zmmA, anyptr_gpB);
  e->vbroadcasti32x2(xmmA, xmmB);
  e->vbroadcasti32x2(xmmA, anyptr_gpB);
  e->vbroadcasti32x2(ymmA, xmmB);
  e->vbroadcasti32x2(ymmA, anyptr_gpB);
  e->vbroadcasti32x2(zmmA, xmmB);
  e->vbroadcasti32x2(zmmA, anyptr_gpB);
  e->vbroadcasti32x4(ymmA, anyptr_gpB);
  e->vbroadcasti32x4(zmmA, anyptr_gpB);
  e->vbroadcasti32x8(zmmA, anyptr_gpB);
  e->vbroadcasti64x2(ymmA, anyptr_gpB);
  e->vbroadcasti64x2(zmmA, anyptr_gpB);
  e->vbroadcasti64x4(zmmA, anyptr_gpB);
  e->vbroadcastsd(ymmA, xmmB);
  e->vbroadcastsd(ymmA, anyptr_gpB);
  e->vbroadcastsd(zmmA, xmmB);
  e->vbroadcastsd(zmmA, anyptr_gpB);
  e->vbroadcastss(xmmA, xmmB);
  e->vbroadcastss(xmmA, anyptr_gpB);
  e->vbroadcastss(ymmA, xmmB);
  e->vbroadcastss(ymmA, anyptr_gpB);
  e->vbroadcastss(zmmA, xmmB);
  e->vbroadcastss(zmmA, anyptr_gpB);
  e->vcmppd(kA, xmmB, xmmC, 0);
  e->vcmppd(kA, xmmB, anyptr_gpC, 0);
  e->vcmppd(kA, ymmB, ymmC, 0);
  e->vcmppd(kA, ymmB, anyptr_gpC, 0);
  e->vcmppd(kA, zmmB, zmmC, 0);
  e->vcmppd(kA, zmmB, anyptr_gpC, 0);
  e->vcmpps(kA, xmmB, xmmC, 0);
  e->vcmpps(kA, xmmB, anyptr_gpC, 0);
  e->vcmpps(kA, ymmB, ymmC, 0);
  e->vcmpps(kA, ymmB, anyptr_gpC, 0);
  e->vcmpps(kA, zmmB, zmmC, 0);
  e->vcmpps(kA, zmmB, anyptr_gpC, 0);
  e->vcmpsd(kA, xmmB, xmmC, 0);
  e->vcmpsd(kA, xmmB, anyptr_gpC, 0);
  e->vcmpss(kA, xmmB, xmmC, 0);
  e->vcmpss(kA, xmmB, anyptr_gpC, 0);
  e->vcomisd(xmmA, xmmB);
  e->vcomisd(xmmA, anyptr_gpB);
  e->vcomiss(xmmA, xmmB);
  e->vcomiss(xmmA, anyptr_gpB);
  e->vcompresspd(xmmA, xmmB);
  e->vcompresspd(anyptr_gpA, xmmB);
  e->vcompresspd(ymmA, ymmB);
  e->vcompresspd(anyptr_gpA, ymmB);
  e->vcompresspd(zmmA, zmmB);
  e->vcompresspd(anyptr_gpA, zmmB);
  e->vcompressps(xmmA, xmmB);
  e->vcompressps(anyptr_gpA, xmmB);
  e->vcompressps(ymmA, ymmB);
  e->vcompressps(anyptr_gpA, ymmB);
  e->vcompressps(zmmA, zmmB);
  e->vcompressps(anyptr_gpA, zmmB);
  e->vcvtdq2pd(xmmA, xmmB);
  e->vcvtdq2pd(xmmA, anyptr_gpB);
  e->vcvtdq2pd(ymmA, xmmB);
  e->vcvtdq2pd(ymmA, anyptr_gpB);
  e->vcvtdq2pd(zmmA, ymmB);
  e->vcvtdq2pd(zmmA, anyptr_gpB);
  e->vcvtdq2ps(xmmA, xmmB);
  e->vcvtdq2ps(xmmA, anyptr_gpB);
  e->vcvtdq2ps(ymmA, ymmB);
  e->vcvtdq2ps(ymmA, anyptr_gpB);
  e->vcvtdq2ps(zmmA, zmmB);
  e->vcvtdq2ps(zmmA, anyptr_gpB);
  e->vcvtpd2dq(xmmA, xmmB);
  e->vcvtpd2dq(xmmA, anyptr_gpB);
  e->vcvtpd2dq(xmmA, ymmB);
  e->vcvtpd2dq(xmmA, anyptr_gpB);
  e->vcvtpd2dq(ymmA, zmmB);
  e->vcvtpd2dq(ymmA, anyptr_gpB);
  e->vcvtpd2qq(xmmA, xmmB);
  e->vcvtpd2qq(xmmA, anyptr_gpB);
  e->vcvtpd2qq(ymmA, ymmB);
  e->vcvtpd2qq(ymmA, anyptr_gpB);
  e->vcvtpd2qq(zmmA, zmmB);
  e->vcvtpd2qq(zmmA, anyptr_gpB);
  e->vcvtpd2udq(xmmA, xmmB);
  e->vcvtpd2udq(xmmA, anyptr_gpB);
  e->vcvtpd2udq(xmmA, ymmB);
  e->vcvtpd2udq(xmmA, anyptr_gpB);
  e->vcvtpd2udq(ymmA, zmmB);
  e->vcvtpd2udq(ymmA, anyptr_gpB);
  e->vcvtpd2uqq(xmmA, xmmB);
  e->vcvtpd2uqq(xmmA, anyptr_gpB);
  e->vcvtpd2uqq(ymmA, ymmB);
  e->vcvtpd2uqq(ymmA, anyptr_gpB);
  e->vcvtpd2uqq(zmmA, zmmB);
  e->vcvtpd2uqq(zmmA, anyptr_gpB);
  e->vcvtph2ps(xmmA, xmmB);
  e->vcvtph2ps(xmmA, anyptr_gpB);
  e->vcvtph2ps(ymmA, xmmB);
  e->vcvtph2ps(ymmA, anyptr_gpB);
  e->vcvtph2ps(zmmA, ymmB);
  e->vcvtph2ps(zmmA, anyptr_gpB);
  e->vcvtps2dq(xmmA, xmmB);
  e->vcvtps2dq(xmmA, anyptr_gpB);
  e->vcvtps2dq(ymmA, ymmB);
  e->vcvtps2dq(ymmA, anyptr_gpB);
  e->vcvtps2dq(zmmA, zmmB);
  e->vcvtps2dq(zmmA, anyptr_gpB);
  e->vcvtps2pd(xmmA, xmmB);
  e->vcvtps2pd(xmmA, anyptr_gpB);
  e->vcvtps2pd(ymmA, xmmB);
  e->vcvtps2pd(ymmA, anyptr_gpB);
  e->vcvtps2pd(zmmA, ymmB);
  e->vcvtps2pd(zmmA, anyptr_gpB);
  e->vcvtps2ph(xmmA, xmmB, 0);
  e->vcvtps2ph(anyptr_gpA, xmmB, 0);
  e->vcvtps2ph(xmmA, ymmB, 0);
  e->vcvtps2ph(anyptr_gpA, ymmB, 0);
  e->vcvtps2ph(ymmA, zmmB, 0);
  e->vcvtps2ph(anyptr_gpA, zmmB, 0);
  e->vcvtps2qq(xmmA, xmmB);
  e->vcvtps2qq(xmmA, anyptr_gpB);
  e->vcvtps2qq(ymmA, xmmB);
  e->vcvtps2qq(ymmA, anyptr_gpB);
  e->vcvtps2qq(zmmA, ymmB);
  e->vcvtps2qq(zmmA, anyptr_gpB);
  e->vcvtps2udq(xmmA, xmmB);
  e->vcvtps2udq(xmmA, anyptr_gpB);
  e->vcvtps2udq(ymmA, ymmB);
  e->vcvtps2udq(ymmA, anyptr_gpB);
  e->vcvtps2udq(zmmA, zmmB);
  e->vcvtps2udq(zmmA, anyptr_gpB);
  e->vcvtps2uqq(xmmA, xmmB);
  e->vcvtps2uqq(xmmA, anyptr_gpB);
  e->vcvtps2uqq(ymmA, xmmB);
  e->vcvtps2uqq(ymmA, anyptr_gpB);
  e->vcvtps2uqq(zmmA, ymmB);
  e->vcvtps2uqq(zmmA, anyptr_gpB);
  e->vcvtqq2pd(xmmA, xmmB);
  e->vcvtqq2pd(xmmA, anyptr_gpB);
  e->vcvtqq2pd(ymmA, ymmB);
  e->vcvtqq2pd(ymmA, anyptr_gpB);
  e->vcvtqq2pd(zmmA, zmmB);
  e->vcvtqq2pd(zmmA, anyptr_gpB);
  e->vcvtqq2ps(xmmA, xmmB);
  e->vcvtqq2ps(xmmA, anyptr_gpB);
  e->vcvtqq2ps(xmmA, ymmB);
  e->vcvtqq2ps(xmmA, anyptr_gpB);
  e->vcvtqq2ps(ymmA, zmmB);
  e->vcvtqq2ps(ymmA, anyptr_gpB);
  e->vcvtsd2si(gdA, xmmB);
  e->vcvtsd2si(gdA, anyptr_gpB);
  if (isX64) e->vcvtsd2si(gzA, xmmB);
  if (isX64) e->vcvtsd2si(gzA, anyptr_gpB);
  e->vcvtsd2ss(xmmA, xmmB, xmmC);
  e->vcvtsd2ss(xmmA, xmmB, anyptr_gpC);
  e->vcvtsd2usi(gdA, xmmB);
  e->vcvtsd2usi(gdA, anyptr_gpB);
  if (isX64) e->vcvtsd2usi(gzA, xmmB);
  if (isX64) e->vcvtsd2usi(gzA, anyptr_gpB);
  e->vcvtsi2sd(xmmA, xmmB, gdC);
  e->vcvtsi2sd(xmmA, xmmB, dword_ptr(gzC));
  if (isX64) e->vcvtsi2sd(xmmA, xmmB, gzC);
  if (isX64) e->vcvtsi2sd(xmmA, xmmB, qword_ptr(gzC));
  e->vcvtsi2ss(xmmA, xmmB, gdC);
  e->vcvtsi2ss(xmmA, xmmB, dword_ptr(gzC));
  if (isX64) e->vcvtsi2ss(xmmA, xmmB, gzC);
  if (isX64) e->vcvtsi2ss(xmmA, xmmB, qword_ptr(gzC));
  e->vcvtss2sd(xmmA, xmmB, xmmC);
  e->vcvtss2sd(xmmA, xmmB, anyptr_gpC);
  e->vcvtss2si(gdA, xmmB);
  e->vcvtss2si(gdA, anyptr_gpB);
  if (isX64) e->vcvtss2si(gzA, xmmB);
  if (isX64) e->vcvtss2si(gzA, anyptr_gpB);
  e->vcvtss2usi(gdA, xmmB);
  e->vcvtss2usi(gdA, anyptr_gpB);
  if (isX64) e->vcvtss2usi(gzA, xmmB);
  if (isX64) e->vcvtss2usi(gzA, anyptr_gpB);
  e->vcvttpd2dq(xmmA, xmmB);
  e->vcvttpd2dq(xmmA, anyptr_gpB);
  e->vcvttpd2dq(xmmA, ymmB);
  e->vcvttpd2dq(xmmA, anyptr_gpB);
  e->vcvttpd2dq(ymmA, zmmB);
  e->vcvttpd2dq(ymmA, anyptr_gpB);
  e->vcvttpd2qq(xmmA, xmmB);
  e->vcvttpd2qq(xmmA, anyptr_gpB);
  e->vcvttpd2qq(ymmA, ymmB);
  e->vcvttpd2qq(ymmA, anyptr_gpB);
  e->vcvttpd2qq(zmmA, zmmB);
  e->vcvttpd2qq(zmmA, anyptr_gpB);
  e->vcvttpd2udq(xmmA, xmmB);
  e->vcvttpd2udq(xmmA, anyptr_gpB);
  e->vcvttpd2udq(xmmA, ymmB);
  e->vcvttpd2udq(xmmA, anyptr_gpB);
  e->vcvttpd2udq(ymmA, zmmB);
  e->vcvttpd2udq(ymmA, anyptr_gpB);
  e->vcvttpd2uqq(xmmA, xmmB);
  e->vcvttpd2uqq(xmmA, anyptr_gpB);
  e->vcvttpd2uqq(ymmA, ymmB);
  e->vcvttpd2uqq(ymmA, anyptr_gpB);
  e->vcvttpd2uqq(zmmA, zmmB);
  e->vcvttpd2uqq(zmmA, anyptr_gpB);
  e->vcvttps2dq(xmmA, xmmB);
  e->vcvttps2dq(xmmA, anyptr_gpB);
  e->vcvttps2dq(ymmA, ymmB);
  e->vcvttps2dq(ymmA, anyptr_gpB);
  e->vcvttps2dq(zmmA, zmmB);
  e->vcvttps2dq(zmmA, anyptr_gpB);
  e->vcvttps2qq(xmmA, xmmB);
  e->vcvttps2qq(xmmA, anyptr_gpB);
  e->vcvttps2qq(ymmA, xmmB);
  e->vcvttps2qq(ymmA, anyptr_gpB);
  e->vcvttps2qq(zmmA, ymmB);
  e->vcvttps2qq(zmmA, anyptr_gpB);
  e->vcvttps2udq(xmmA, xmmB);
  e->vcvttps2udq(xmmA, anyptr_gpB);
  e->vcvttps2udq(ymmA, ymmB);
  e->vcvttps2udq(ymmA, anyptr_gpB);
  e->vcvttps2udq(zmmA, zmmB);
  e->vcvttps2udq(zmmA, anyptr_gpB);
  e->vcvttps2uqq(xmmA, xmmB);
  e->vcvttps2uqq(xmmA, anyptr_gpB);
  e->vcvttps2uqq(ymmA, xmmB);
  e->vcvttps2uqq(ymmA, anyptr_gpB);
  e->vcvttps2uqq(zmmA, ymmB);
  e->vcvttps2uqq(zmmA, anyptr_gpB);
  e->vcvttsd2si(gdA, xmmB);
  e->vcvttsd2si(gdA, anyptr_gpB);
  if (isX64) e->vcvttsd2si(gzA, xmmB);
  if (isX64) e->vcvttsd2si(gzA, anyptr_gpB);
  e->vcvttsd2usi(gdA, xmmB);
  e->vcvttsd2usi(gdA, anyptr_gpB);
  if (isX64) e->vcvttsd2usi(gzA, xmmB);
  if (isX64) e->vcvttsd2usi(gzA, anyptr_gpB);
  e->vcvttss2si(gdA, xmmB);
  e->vcvttss2si(gdA, anyptr_gpB);
  if (isX64) e->vcvttss2si(gzA, xmmB);
  if (isX64) e->vcvttss2si(gzA, anyptr_gpB);
  e->vcvttss2usi(gdA, xmmB);
  e->vcvttss2usi(gdA, anyptr_gpB);
  if (isX64) e->vcvttss2usi(gzA, xmmB);
  if (isX64) e->vcvttss2usi(gzA, anyptr_gpB);
  e->vcvtudq2pd(xmmA, xmmB);
  e->vcvtudq2pd(xmmA, anyptr_gpB);
  e->vcvtudq2pd(ymmA, xmmB);
  e->vcvtudq2pd(ymmA, anyptr_gpB);
  e->vcvtudq2pd(zmmA, ymmB);
  e->vcvtudq2pd(zmmA, anyptr_gpB);
  e->vcvtudq2ps(xmmA, xmmB);
  e->vcvtudq2ps(xmmA, anyptr_gpB);
  e->vcvtudq2ps(ymmA, ymmB);
  e->vcvtudq2ps(ymmA, anyptr_gpB);
  e->vcvtudq2ps(zmmA, zmmB);
  e->vcvtudq2ps(zmmA, anyptr_gpB);
  e->vcvtuqq2pd(xmmA, xmmB);
  e->vcvtuqq2pd(xmmA, anyptr_gpB);
  e->vcvtuqq2pd(ymmA, ymmB);
  e->vcvtuqq2pd(ymmA, anyptr_gpB);
  e->vcvtuqq2pd(zmmA, zmmB);
  e->vcvtuqq2pd(zmmA, anyptr_gpB);
  e->vcvtuqq2ps(xmmA, xmmB);
  e->vcvtuqq2ps(xmmA, anyptr_gpB);
  e->vcvtuqq2ps(xmmA, ymmB);
  e->vcvtuqq2ps(xmmA, anyptr_gpB);
  e->vcvtuqq2ps(ymmA, zmmB);
  e->vcvtuqq2ps(ymmA, anyptr_gpB);
  e->vcvtusi2sd(xmmA, xmmB, gdC);
  e->vcvtusi2sd(xmmA, xmmB, dword_ptr(gzC));
  if (isX64) e->vcvtusi2sd(xmmA, xmmB, gzC);
  if (isX64) e->vcvtusi2sd(xmmA, xmmB, qword_ptr(gzC));
  e->vcvtusi2ss(xmmA, xmmB, gdC);
  e->vcvtusi2ss(xmmA, xmmB, dword_ptr(gzC));
  if (isX64) e->vcvtusi2ss(xmmA, xmmB, gzC);
  if (isX64) e->vcvtusi2ss(xmmA, xmmB, qword_ptr(gzC));
  e->vdbpsadbw(xmmA, xmmB, xmmC, 0);
  e->vdbpsadbw(xmmA, xmmB, anyptr_gpC, 0);
  e->vdbpsadbw(ymmA, ymmB, ymmC, 0);
  e->vdbpsadbw(ymmA, ymmB, anyptr_gpC, 0);
  e->vdbpsadbw(zmmA, zmmB, zmmC, 0);
  e->vdbpsadbw(zmmA, zmmB, anyptr_gpC, 0);
  e->vdivpd(xmmA, xmmB, xmmC);
  e->vdivpd(xmmA, xmmB, anyptr_gpC);
  e->vdivpd(ymmA, ymmB, ymmC);
  e->vdivpd(ymmA, ymmB, anyptr_gpC);
  e->vdivpd(zmmA, zmmB, zmmC);
  e->vdivpd(zmmA, zmmB, anyptr_gpC);
  e->vdivps(xmmA, xmmB, xmmC);
  e->vdivps(xmmA, xmmB, anyptr_gpC);
  e->vdivps(ymmA, ymmB, ymmC);
  e->vdivps(ymmA, ymmB, anyptr_gpC);
  e->vdivps(zmmA, zmmB, zmmC);
  e->vdivps(zmmA, zmmB, anyptr_gpC);
  e->vdivsd(xmmA, xmmB, xmmC);
  e->vdivsd(xmmA, xmmB, anyptr_gpC);
  e->vdivss(xmmA, xmmB, xmmC);
  e->vdivss(xmmA, xmmB, anyptr_gpC);
  e->vexp2pd(zmmA, zmmB);
  e->vexp2pd(zmmA, anyptr_gpB);
  e->vexp2ps(zmmA, zmmB);
  e->vexp2ps(zmmA, anyptr_gpB);
  e->vexpandpd(xmmA, xmmB);
  e->vexpandpd(xmmA, anyptr_gpB);
  e->vexpandpd(ymmA, ymmB);
  e->vexpandpd(ymmA, anyptr_gpB);
  e->vexpandpd(zmmA, zmmB);
  e->vexpandpd(zmmA, anyptr_gpB);
  e->vexpandps(xmmA, xmmB);
  e->vexpandps(xmmA, anyptr_gpB);
  e->vexpandps(ymmA, ymmB);
  e->vexpandps(ymmA, anyptr_gpB);
  e->vexpandps(zmmA, zmmB);
  e->vexpandps(zmmA, anyptr_gpB);
  e->vextractf32x4(xmmA, ymmB, 0);
  e->vextractf32x4(anyptr_gpA, ymmB, 0);
  e->vextractf32x4(xmmA, zmmB, 0);
  e->vextractf32x4(anyptr_gpA, zmmB, 0);
  e->vextractf32x8(ymmA, zmmB, 0);
  e->vextractf32x8(anyptr_gpA, zmmB, 0);
  e->vextractf64x2(xmmA, ymmB, 0);
  e->vextractf64x2(anyptr_gpA, ymmB, 0);
  e->vextractf64x2(xmmA, zmmB, 0);
  e->vextractf64x2(anyptr_gpA, zmmB, 0);
  e->vextractf64x4(ymmA, zmmB, 0);
  e->vextractf64x4(anyptr_gpA, zmmB, 0);
  e->vextracti32x4(xmmA, ymmB, 0);
  e->vextracti32x4(anyptr_gpA, ymmB, 0);
  e->vextracti32x4(xmmA, zmmB, 0);
  e->vextracti32x4(anyptr_gpA, zmmB, 0);
  e->vextracti32x8(ymmA, zmmB, 0);
  e->vextracti32x8(anyptr_gpA, zmmB, 0);
  e->vextracti64x2(xmmA, ymmB, 0);
  e->vextracti64x2(anyptr_gpA, ymmB, 0);
  e->vextracti64x2(xmmA, zmmB, 0);
  e->vextracti64x2(anyptr_gpA, zmmB, 0);
  e->vextracti64x4(ymmA, zmmB, 0);
  e->vextracti64x4(anyptr_gpA, zmmB, 0);
  e->vextractps(gdA, xmmB, 0);
  e->vextractps(gzA, xmmB, 0);
  e->vextractps(anyptr_gpA, xmmB, 0);
  e->vfixupimmpd(xmmA, xmmB, xmmC, 0);
  e->vfixupimmpd(xmmA, xmmB, anyptr_gpC, 0);
  e->vfixupimmpd(ymmA, ymmB, ymmC, 0);
  e->vfixupimmpd(ymmA, ymmB, anyptr_gpC, 0);
  e->vfixupimmpd(zmmA, zmmB, zmmC, 0);
  e->vfixupimmpd(zmmA, zmmB, anyptr_gpC, 0);
  e->vfixupimmps(xmmA, xmmB, xmmC, 0);
  e->vfixupimmps(xmmA, xmmB, anyptr_gpC, 0);
  e->vfixupimmps(ymmA, ymmB, ymmC, 0);
  e->vfixupimmps(ymmA, ymmB, anyptr_gpC, 0);
  e->vfixupimmps(zmmA, zmmB, zmmC, 0);
  e->vfixupimmps(zmmA, zmmB, anyptr_gpC, 0);
  e->vfixupimmsd(xmmA, xmmB, xmmC, 0);
  e->vfixupimmsd(xmmA, xmmB, anyptr_gpC, 0);
  e->vfixupimmss(xmmA, xmmB, xmmC, 0);
  e->vfixupimmss(xmmA, xmmB, anyptr_gpC, 0);
  e->vfmadd132pd(xmmA, xmmB, xmmC);
  e->vfmadd132pd(xmmA, xmmB, anyptr_gpC);
  e->vfmadd132pd(ymmA, ymmB, ymmC);
  e->vfmadd132pd(ymmA, ymmB, anyptr_gpC);
  e->vfmadd132pd(zmmA, zmmB, zmmC);
  e->vfmadd132pd(zmmA, zmmB, anyptr_gpC);
  e->vfmadd132ps(xmmA, xmmB, xmmC);
  e->vfmadd132ps(xmmA, xmmB, anyptr_gpC);
  e->vfmadd132ps(ymmA, ymmB, ymmC);
  e->vfmadd132ps(ymmA, ymmB, anyptr_gpC);
  e->vfmadd132ps(zmmA, zmmB, zmmC);
  e->vfmadd132ps(zmmA, zmmB, anyptr_gpC);
  e->vfmadd132sd(xmmA, xmmB, xmmC);
  e->vfmadd132sd(xmmA, xmmB, anyptr_gpC);
  e->vfmadd132ss(xmmA, xmmB, xmmC);
  e->vfmadd132ss(xmmA, xmmB, anyptr_gpC);
  e->vfmadd213pd(xmmA, xmmB, xmmC);
  e->vfmadd213pd(xmmA, xmmB, anyptr_gpC);
  e->vfmadd213pd(ymmA, ymmB, ymmC);
  e->vfmadd213pd(ymmA, ymmB, anyptr_gpC);
  e->vfmadd213pd(zmmA, zmmB, zmmC);
  e->vfmadd213pd(zmmA, zmmB, anyptr_gpC);
  e->vfmadd213ps(xmmA, xmmB, xmmC);
  e->vfmadd213ps(xmmA, xmmB, anyptr_gpC);
  e->vfmadd213ps(ymmA, ymmB, ymmC);
  e->vfmadd213ps(ymmA, ymmB, anyptr_gpC);
  e->vfmadd213ps(zmmA, zmmB, zmmC);
  e->vfmadd213ps(zmmA, zmmB, anyptr_gpC);
  e->vfmadd213sd(xmmA, xmmB, xmmC);
  e->vfmadd213sd(xmmA, xmmB, anyptr_gpC);
  e->vfmadd213ss(xmmA, xmmB, xmmC);
  e->vfmadd213ss(xmmA, xmmB, anyptr_gpC);
  e->vfmadd231pd(xmmA, xmmB, xmmC);
  e->vfmadd231pd(xmmA, xmmB, anyptr_gpC);
  e->vfmadd231pd(ymmA, ymmB, ymmC);
  e->vfmadd231pd(ymmA, ymmB, anyptr_gpC);
  e->vfmadd231pd(zmmA, zmmB, zmmC);
  e->vfmadd231pd(zmmA, zmmB, anyptr_gpC);
  e->vfmadd231ps(xmmA, xmmB, xmmC);
  e->vfmadd231ps(xmmA, xmmB, anyptr_gpC);
  e->vfmadd231ps(ymmA, ymmB, ymmC);
  e->vfmadd231ps(ymmA, ymmB, anyptr_gpC);
  e->vfmadd231ps(zmmA, zmmB, zmmC);
  e->vfmadd231ps(zmmA, zmmB, anyptr_gpC);
  e->vfmadd231sd(xmmA, xmmB, xmmC);
  e->vfmadd231sd(xmmA, xmmB, anyptr_gpC);
  e->vfmadd231ss(xmmA, xmmB, xmmC);
  e->vfmadd231ss(xmmA, xmmB, anyptr_gpC);
  e->vfmaddsub132pd(xmmA, xmmB, xmmC);
  e->vfmaddsub132pd(xmmA, xmmB, anyptr_gpC);
  e->vfmaddsub132pd(ymmA, ymmB, ymmC);
  e->vfmaddsub132pd(ymmA, ymmB, anyptr_gpC);
  e->vfmaddsub132pd(zmmA, zmmB, zmmC);
  e->vfmaddsub132pd(zmmA, zmmB, anyptr_gpC);
  e->vfmaddsub132ps(xmmA, xmmB, xmmC);
  e->vfmaddsub132ps(xmmA, xmmB, anyptr_gpC);
  e->vfmaddsub132ps(ymmA, ymmB, ymmC);
  e->vfmaddsub132ps(ymmA, ymmB, anyptr_gpC);
  e->vfmaddsub132ps(zmmA, zmmB, zmmC);
  e->vfmaddsub132ps(zmmA, zmmB, anyptr_gpC);
  e->vfmaddsub213pd(xmmA, xmmB, xmmC);
  e->vfmaddsub213pd(xmmA, xmmB, anyptr_gpC);
  e->vfmaddsub213pd(ymmA, ymmB, ymmC);
  e->vfmaddsub213pd(ymmA, ymmB, anyptr_gpC);
  e->vfmaddsub213pd(zmmA, zmmB, zmmC);
  e->vfmaddsub213pd(zmmA, zmmB, anyptr_gpC);
  e->vfmaddsub213ps(xmmA, xmmB, xmmC);
  e->vfmaddsub213ps(xmmA, xmmB, anyptr_gpC);
  e->vfmaddsub213ps(ymmA, ymmB, ymmC);
  e->vfmaddsub213ps(ymmA, ymmB, anyptr_gpC);
  e->vfmaddsub213ps(zmmA, zmmB, zmmC);
  e->vfmaddsub213ps(zmmA, zmmB, anyptr_gpC);
  e->vfmaddsub231pd(xmmA, xmmB, xmmC);
  e->vfmaddsub231pd(xmmA, xmmB, anyptr_gpC);
  e->vfmaddsub231pd(ymmA, ymmB, ymmC);
  e->vfmaddsub231pd(ymmA, ymmB, anyptr_gpC);
  e->vfmaddsub231pd(zmmA, zmmB, zmmC);
  e->vfmaddsub231pd(zmmA, zmmB, anyptr_gpC);
  e->vfmaddsub231ps(xmmA, xmmB, xmmC);
  e->vfmaddsub231ps(xmmA, xmmB, anyptr_gpC);
  e->vfmaddsub231ps(ymmA, ymmB, ymmC);
  e->vfmaddsub231ps(ymmA, ymmB, anyptr_gpC);
  e->vfmaddsub231ps(zmmA, zmmB, zmmC);
  e->vfmaddsub231ps(zmmA, zmmB, anyptr_gpC);
  e->vfmsub132pd(xmmA, xmmB, xmmC);
  e->vfmsub132pd(xmmA, xmmB, anyptr_gpC);
  e->vfmsub132pd(ymmA, ymmB, ymmC);
  e->vfmsub132pd(ymmA, ymmB, anyptr_gpC);
  e->vfmsub132pd(zmmA, zmmB, zmmC);
  e->vfmsub132pd(zmmA, zmmB, anyptr_gpC);
  e->vfmsub132ps(xmmA, xmmB, xmmC);
  e->vfmsub132ps(xmmA, xmmB, anyptr_gpC);
  e->vfmsub132ps(ymmA, ymmB, ymmC);
  e->vfmsub132ps(ymmA, ymmB, anyptr_gpC);
  e->vfmsub132ps(zmmA, zmmB, zmmC);
  e->vfmsub132ps(zmmA, zmmB, anyptr_gpC);
  e->vfmsub132sd(xmmA, xmmB, xmmC);
  e->vfmsub132sd(xmmA, xmmB, anyptr_gpC);
  e->vfmsub132ss(xmmA, xmmB, xmmC);
  e->vfmsub132ss(xmmA, xmmB, anyptr_gpC);
  e->vfmsub213pd(xmmA, xmmB, xmmC);
  e->vfmsub213pd(xmmA, xmmB, anyptr_gpC);
  e->vfmsub213pd(ymmA, ymmB, ymmC);
  e->vfmsub213pd(ymmA, ymmB, anyptr_gpC);
  e->vfmsub213pd(zmmA, zmmB, zmmC);
  e->vfmsub213pd(zmmA, zmmB, anyptr_gpC);
  e->vfmsub213ps(xmmA, xmmB, xmmC);
  e->vfmsub213ps(xmmA, xmmB, anyptr_gpC);
  e->vfmsub213ps(ymmA, ymmB, ymmC);
  e->vfmsub213ps(ymmA, ymmB, anyptr_gpC);
  e->vfmsub213ps(zmmA, zmmB, zmmC);
  e->vfmsub213ps(zmmA, zmmB, anyptr_gpC);
  e->vfmsub213sd(xmmA, xmmB, xmmC);
  e->vfmsub213sd(xmmA, xmmB, anyptr_gpC);
  e->vfmsub213ss(xmmA, xmmB, xmmC);
  e->vfmsub213ss(xmmA, xmmB, anyptr_gpC);
  e->vfmsub231pd(xmmA, xmmB, xmmC);
  e->vfmsub231pd(xmmA, xmmB, anyptr_gpC);
  e->vfmsub231pd(ymmA, ymmB, ymmC);
  e->vfmsub231pd(ymmA, ymmB, anyptr_gpC);
  e->vfmsub231pd(zmmA, zmmB, zmmC);
  e->vfmsub231pd(zmmA, zmmB, anyptr_gpC);
  e->vfmsub231ps(xmmA, xmmB, xmmC);
  e->vfmsub231ps(xmmA, xmmB, anyptr_gpC);
  e->vfmsub231ps(ymmA, ymmB, ymmC);
  e->vfmsub231ps(ymmA, ymmB, anyptr_gpC);
  e->vfmsub231ps(zmmA, zmmB, zmmC);
  e->vfmsub231ps(zmmA, zmmB, anyptr_gpC);
  e->vfmsub231sd(xmmA, xmmB, xmmC);
  e->vfmsub231sd(xmmA, xmmB, anyptr_gpC);
  e->vfmsub231ss(xmmA, xmmB, xmmC);
  e->vfmsub231ss(xmmA, xmmB, anyptr_gpC);
  e->vfmsubadd132pd(xmmA, xmmB, xmmC);
  e->vfmsubadd132pd(xmmA, xmmB, anyptr_gpC);
  e->vfmsubadd132pd(ymmA, ymmB, ymmC);
  e->vfmsubadd132pd(ymmA, ymmB, anyptr_gpC);
  e->vfmsubadd132pd(zmmA, zmmB, zmmC);
  e->vfmsubadd132pd(zmmA, zmmB, anyptr_gpC);
  e->vfmsubadd132ps(xmmA, xmmB, xmmC);
  e->vfmsubadd132ps(xmmA, xmmB, anyptr_gpC);
  e->vfmsubadd132ps(ymmA, ymmB, ymmC);
  e->vfmsubadd132ps(ymmA, ymmB, anyptr_gpC);
  e->vfmsubadd132ps(zmmA, zmmB, zmmC);
  e->vfmsubadd132ps(zmmA, zmmB, anyptr_gpC);
  e->vfmsubadd213pd(xmmA, xmmB, xmmC);
  e->vfmsubadd213pd(xmmA, xmmB, anyptr_gpC);
  e->vfmsubadd213pd(ymmA, ymmB, ymmC);
  e->vfmsubadd213pd(ymmA, ymmB, anyptr_gpC);
  e->vfmsubadd213pd(zmmA, zmmB, zmmC);
  e->vfmsubadd213pd(zmmA, zmmB, anyptr_gpC);
  e->vfmsubadd213ps(xmmA, xmmB, xmmC);
  e->vfmsubadd213ps(xmmA, xmmB, anyptr_gpC);
  e->vfmsubadd213ps(ymmA, ymmB, ymmC);
  e->vfmsubadd213ps(ymmA, ymmB, anyptr_gpC);
  e->vfmsubadd213ps(zmmA, zmmB, zmmC);
  e->vfmsubadd213ps(zmmA, zmmB, anyptr_gpC);
  e->vfmsubadd231pd(xmmA, xmmB, xmmC);
  e->vfmsubadd231pd(xmmA, xmmB, anyptr_gpC);
  e->vfmsubadd231pd(ymmA, ymmB, ymmC);
  e->vfmsubadd231pd(ymmA, ymmB, anyptr_gpC);
  e->vfmsubadd231pd(zmmA, zmmB, zmmC);
  e->vfmsubadd231pd(zmmA, zmmB, anyptr_gpC);
  e->vfmsubadd231ps(xmmA, xmmB, xmmC);
  e->vfmsubadd231ps(xmmA, xmmB, anyptr_gpC);
  e->vfmsubadd231ps(ymmA, ymmB, ymmC);
  e->vfmsubadd231ps(ymmA, ymmB, anyptr_gpC);
  e->vfmsubadd231ps(zmmA, zmmB, zmmC);
  e->vfmsubadd231ps(zmmA, zmmB, anyptr_gpC);
  e->vfnmadd132pd(xmmA, xmmB, xmmC);
  e->vfnmadd132pd(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd132pd(ymmA, ymmB, ymmC);
  e->vfnmadd132pd(ymmA, ymmB, anyptr_gpC);
  e->vfnmadd132pd(zmmA, zmmB, zmmC);
  e->vfnmadd132pd(zmmA, zmmB, anyptr_gpC);
  e->vfnmadd132ps(xmmA, xmmB, xmmC);
  e->vfnmadd132ps(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd132ps(ymmA, ymmB, ymmC);
  e->vfnmadd132ps(ymmA, ymmB, anyptr_gpC);
  e->vfnmadd132ps(zmmA, zmmB, zmmC);
  e->vfnmadd132ps(zmmA, zmmB, anyptr_gpC);
  e->vfnmadd132sd(xmmA, xmmB, xmmC);
  e->vfnmadd132sd(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd132ss(xmmA, xmmB, xmmC);
  e->vfnmadd132ss(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd213pd(xmmA, xmmB, xmmC);
  e->vfnmadd213pd(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd213pd(ymmA, ymmB, ymmC);
  e->vfnmadd213pd(ymmA, ymmB, anyptr_gpC);
  e->vfnmadd213pd(zmmA, zmmB, zmmC);
  e->vfnmadd213pd(zmmA, zmmB, anyptr_gpC);
  e->vfnmadd213ps(xmmA, xmmB, xmmC);
  e->vfnmadd213ps(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd213ps(ymmA, ymmB, ymmC);
  e->vfnmadd213ps(ymmA, ymmB, anyptr_gpC);
  e->vfnmadd213ps(zmmA, zmmB, zmmC);
  e->vfnmadd213ps(zmmA, zmmB, anyptr_gpC);
  e->vfnmadd213sd(xmmA, xmmB, xmmC);
  e->vfnmadd213sd(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd213ss(xmmA, xmmB, xmmC);
  e->vfnmadd213ss(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd231pd(xmmA, xmmB, xmmC);
  e->vfnmadd231pd(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd231pd(ymmA, ymmB, ymmC);
  e->vfnmadd231pd(ymmA, ymmB, anyptr_gpC);
  e->vfnmadd231pd(zmmA, zmmB, zmmC);
  e->vfnmadd231pd(zmmA, zmmB, anyptr_gpC);
  e->vfnmadd231ps(xmmA, xmmB, xmmC);
  e->vfnmadd231ps(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd231ps(ymmA, ymmB, ymmC);
  e->vfnmadd231ps(ymmA, ymmB, anyptr_gpC);
  e->vfnmadd231ps(zmmA, zmmB, zmmC);
  e->vfnmadd231ps(zmmA, zmmB, anyptr_gpC);
  e->vfnmadd231sd(xmmA, xmmB, xmmC);
  e->vfnmadd231sd(xmmA, xmmB, anyptr_gpC);
  e->vfnmadd231ss(xmmA, xmmB, xmmC);
  e->vfnmadd231ss(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub132pd(xmmA, xmmB, xmmC);
  e->vfnmsub132pd(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub132pd(ymmA, ymmB, ymmC);
  e->vfnmsub132pd(ymmA, ymmB, anyptr_gpC);
  e->vfnmsub132pd(zmmA, zmmB, zmmC);
  e->vfnmsub132pd(zmmA, zmmB, anyptr_gpC);
  e->vfnmsub132ps(xmmA, xmmB, xmmC);
  e->vfnmsub132ps(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub132ps(ymmA, ymmB, ymmC);
  e->vfnmsub132ps(ymmA, ymmB, anyptr_gpC);
  e->vfnmsub132ps(zmmA, zmmB, zmmC);
  e->vfnmsub132ps(zmmA, zmmB, anyptr_gpC);
  e->vfnmsub132sd(xmmA, xmmB, xmmC);
  e->vfnmsub132sd(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub132ss(xmmA, xmmB, xmmC);
  e->vfnmsub132ss(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub213pd(xmmA, xmmB, xmmC);
  e->vfnmsub213pd(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub213pd(ymmA, ymmB, ymmC);
  e->vfnmsub213pd(ymmA, ymmB, anyptr_gpC);
  e->vfnmsub213pd(zmmA, zmmB, zmmC);
  e->vfnmsub213pd(zmmA, zmmB, anyptr_gpC);
  e->vfnmsub213ps(xmmA, xmmB, xmmC);
  e->vfnmsub213ps(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub213ps(ymmA, ymmB, ymmC);
  e->vfnmsub213ps(ymmA, ymmB, anyptr_gpC);
  e->vfnmsub213ps(zmmA, zmmB, zmmC);
  e->vfnmsub213ps(zmmA, zmmB, anyptr_gpC);
  e->vfnmsub213sd(xmmA, xmmB, xmmC);
  e->vfnmsub213sd(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub213ss(xmmA, xmmB, xmmC);
  e->vfnmsub213ss(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub231pd(xmmA, xmmB, xmmC);
  e->vfnmsub231pd(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub231pd(ymmA, ymmB, ymmC);
  e->vfnmsub231pd(ymmA, ymmB, anyptr_gpC);
  e->vfnmsub231pd(zmmA, zmmB, zmmC);
  e->vfnmsub231pd(zmmA, zmmB, anyptr_gpC);
  e->vfnmsub231ps(xmmA, xmmB, xmmC);
  e->vfnmsub231ps(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub231ps(ymmA, ymmB, ymmC);
  e->vfnmsub231ps(ymmA, ymmB, anyptr_gpC);
  e->vfnmsub231ps(zmmA, zmmB, zmmC);
  e->vfnmsub231ps(zmmA, zmmB, anyptr_gpC);
  e->vfnmsub231sd(xmmA, xmmB, xmmC);
  e->vfnmsub231sd(xmmA, xmmB, anyptr_gpC);
  e->vfnmsub231ss(xmmA, xmmB, xmmC);
  e->vfnmsub231ss(xmmA, xmmB, anyptr_gpC);
  e->vfpclasspd(kA, xmmB, 0);
  e->vfpclasspd(kA, anyptr_gpB, 0);
  e->vfpclasspd(kA, ymmB, 0);
  e->vfpclasspd(kA, anyptr_gpB, 0);
  e->vfpclasspd(kA, zmmB, 0);
  e->vfpclasspd(kA, anyptr_gpB, 0);
  e->vfpclassps(kA, xmmB, 0);
  e->vfpclassps(kA, anyptr_gpB, 0);
  e->vfpclassps(kA, ymmB, 0);
  e->vfpclassps(kA, anyptr_gpB, 0);
  e->vfpclassps(kA, zmmB, 0);
  e->vfpclassps(kA, anyptr_gpB, 0);
  e->vfpclasssd(kA, xmmB, 0);
  e->vfpclasssd(kA, anyptr_gpB, 0);
  e->vfpclassss(kA, xmmB, 0);
  e->vfpclassss(kA, anyptr_gpB, 0);
  e->vgatherdpd(xmmA, vx_ptr);
  e->vgatherdpd(ymmA, vy_ptr);
  e->vgatherdpd(zmmA, vz_ptr);
  e->vgatherdps(xmmA, vx_ptr);
  e->vgatherdps(ymmA, vy_ptr);
  e->vgatherdps(zmmA, vz_ptr);
  e->vgatherpf0dpd(vy_ptr);
  e->vgatherpf0dps(vz_ptr);
  e->vgatherpf0qpd(vz_ptr);
  e->vgatherpf0qps(vz_ptr);
  e->vgatherpf1dpd(vy_ptr);
  e->vgatherpf1dps(vz_ptr);
  e->vgatherpf1qpd(vz_ptr);
  e->vgatherpf1qps(vz_ptr);
  e->vgatherqpd(xmmA, vx_ptr);
  e->vgatherqpd(ymmA, vy_ptr);
  e->vgatherqpd(zmmA, vz_ptr);
  e->vgatherqps(xmmA, vx_ptr);
  e->vgatherqps(ymmA, vy_ptr);
  e->vgatherqps(zmmA, vz_ptr);
  e->vgetexppd(xmmA, xmmB);
  e->vgetexppd(xmmA, anyptr_gpB);
  e->vgetexppd(ymmA, ymmB);
  e->vgetexppd(ymmA, anyptr_gpB);
  e->vgetexppd(zmmA, zmmB);
  e->vgetexppd(zmmA, anyptr_gpB);
  e->vgetexpps(xmmA, xmmB);
  e->vgetexpps(xmmA, anyptr_gpB);
  e->vgetexpps(ymmA, ymmB);
  e->vgetexpps(ymmA, anyptr_gpB);
  e->vgetexpps(zmmA, zmmB);
  e->vgetexpps(zmmA, anyptr_gpB);
  e->vgetexpsd(xmmA, xmmB, xmmC);
  e->vgetexpsd(xmmA, xmmB, anyptr_gpB);
  e->vgetexpss(xmmA, xmmB, xmmC);
  e->vgetexpss(xmmA, xmmB, anyptr_gpB);
  e->vgetmantpd(xmmA, xmmB, 0);
  e->vgetmantpd(xmmA, anyptr_gpB, 0);
  e->vgetmantpd(ymmA, ymmB, 0);
  e->vgetmantpd(ymmA, anyptr_gpB, 0);
  e->vgetmantpd(zmmA, zmmB, 0);
  e->vgetmantpd(zmmA, anyptr_gpB, 0);
  e->vgetmantps(xmmA, xmmB, 0);
  e->vgetmantps(xmmA, anyptr_gpB, 0);
  e->vgetmantps(ymmA, ymmB, 0);
  e->vgetmantps(ymmA, anyptr_gpB, 0);
  e->vgetmantps(zmmA, zmmB, 0);
  e->vgetmantps(zmmA, anyptr_gpB, 0);
  e->vgetmantsd(xmmA, xmmB, xmmC, 0);
  e->vgetmantsd(xmmA, xmmB, anyptr_gpB, 0);
  e->vgetmantss(xmmA, xmmB, xmmC, 0);
  e->vgetmantss(xmmA, xmmB, anyptr_gpB, 0);
  e->vinsertf32x4(ymmA, ymmB, xmmC, 0);
  e->vinsertf32x4(ymmA, ymmB, anyptr_gpC, 0);
  e->vinsertf32x4(zmmA, zmmB, xmmC, 0);
  e->vinsertf32x4(zmmA, zmmB, anyptr_gpC, 0);
  e->vinsertf32x8(zmmA, zmmB, ymmC, 0);
  e->vinsertf32x8(zmmA, zmmB, anyptr_gpC, 0);
  e->vinsertf64x2(ymmA, ymmB, xmmC, 0);
  e->vinsertf64x2(ymmA, ymmB, anyptr_gpC, 0);
  e->vinsertf64x2(zmmA, zmmB, xmmC, 0);
  e->vinsertf64x2(zmmA, zmmB, anyptr_gpC, 0);
  e->vinsertf64x4(zmmA, zmmB, ymmC, 0);
  e->vinsertf64x4(zmmA, zmmB, anyptr_gpC, 0);
  e->vinserti32x4(ymmA, ymmB, xmmC, 0);
  e->vinserti32x4(ymmA, ymmB, anyptr_gpC, 0);
  e->vinserti32x4(zmmA, zmmB, xmmC, 0);
  e->vinserti32x4(zmmA, zmmB, anyptr_gpC, 0);
  e->vinserti32x8(zmmA, zmmB, ymmC, 0);
  e->vinserti32x8(zmmA, zmmB, anyptr_gpC, 0);
  e->vinserti64x2(ymmA, ymmB, xmmC, 0);
  e->vinserti64x2(ymmA, ymmB, anyptr_gpC, 0);
  e->vinserti64x2(zmmA, zmmB, xmmC, 0);
  e->vinserti64x2(zmmA, zmmB, anyptr_gpC, 0);
  e->vinserti64x4(zmmA, zmmB, ymmC, 0);
  e->vinserti64x4(zmmA, zmmB, anyptr_gpC, 0);
  e->vinsertps(xmmA, xmmB, xmmC, 0);
  e->vinsertps(xmmA, xmmB, anyptr_gpC, 0);
  e->vmaxpd(xmmA, xmmB, xmmC);
  e->vmaxpd(xmmA, xmmB, anyptr_gpC);
  e->vmaxpd(ymmA, ymmB, ymmC);
  e->vmaxpd(ymmA, ymmB, anyptr_gpC);
  e->vmaxpd(zmmA, zmmB, zmmC);
  e->vmaxpd(zmmA, zmmB, anyptr_gpC);
  e->vmaxps(xmmA, xmmB, xmmC);
  e->vmaxps(xmmA, xmmB, anyptr_gpC);
  e->vmaxps(ymmA, ymmB, ymmC);
  e->vmaxps(ymmA, ymmB, anyptr_gpC);
  e->vmaxps(zmmA, zmmB, zmmC);
  e->vmaxps(zmmA, zmmB, anyptr_gpC);
  e->vmaxsd(xmmA, xmmB, xmmC);
  e->vmaxsd(xmmA, xmmB, anyptr_gpC);
  e->vmaxss(xmmA, xmmB, xmmC);
  e->vmaxss(xmmA, xmmB, anyptr_gpC);
  e->vminpd(xmmA, xmmB, xmmC);
  e->vminpd(xmmA, xmmB, anyptr_gpC);
  e->vminpd(ymmA, ymmB, ymmC);
  e->vminpd(ymmA, ymmB, anyptr_gpC);
  e->vminpd(zmmA, zmmB, zmmC);
  e->vminpd(zmmA, zmmB, anyptr_gpC);
  e->vminps(xmmA, xmmB, xmmC);
  e->vminps(xmmA, xmmB, anyptr_gpC);
  e->vminps(ymmA, ymmB, ymmC);
  e->vminps(ymmA, ymmB, anyptr_gpC);
  e->vminps(zmmA, zmmB, zmmC);
  e->vminps(zmmA, zmmB, anyptr_gpC);
  e->vminsd(xmmA, xmmB, xmmC);
  e->vminsd(xmmA, xmmB, anyptr_gpC);
  e->vminss(xmmA, xmmB, xmmC);
  e->vminss(xmmA, xmmB, anyptr_gpC);
  e->vmovapd(xmmA, xmmB);
  e->vmovapd(xmmA, anyptr_gpB);
  e->vmovapd(xmmA, xmmB);
  e->vmovapd(anyptr_gpA, xmmB);
  e->vmovapd(ymmA, ymmB);
  e->vmovapd(ymmA, anyptr_gpB);
  e->vmovapd(ymmA, ymmB);
  e->vmovapd(anyptr_gpA, ymmB);
  e->vmovapd(zmmA, zmmB);
  e->vmovapd(zmmA, anyptr_gpB);
  e->vmovapd(zmmA, zmmB);
  e->vmovapd(anyptr_gpA, zmmB);
  e->vmovaps(xmmA, xmmB);
  e->vmovaps(xmmA, anyptr_gpB);
  e->vmovaps(xmmA, xmmB);
  e->vmovaps(anyptr_gpA, xmmB);
  e->vmovaps(ymmA, ymmB);
  e->vmovaps(ymmA, anyptr_gpB);
  e->vmovaps(ymmA, ymmB);
  e->vmovaps(anyptr_gpA, ymmB);
  e->vmovaps(zmmA, zmmB);
  e->vmovaps(zmmA, anyptr_gpB);
  e->vmovaps(zmmA, zmmB);
  e->vmovaps(anyptr_gpA, zmmB);
  e->vmovd(gdA, xmmB);
  e->vmovd(gzA, xmmB);
  e->vmovd(anyptr_gpA, xmmB);
  e->vmovd(xmmA, gdB);
  e->vmovd(xmmA, gzB);
  e->vmovd(xmmA, anyptr_gpB);
  e->vmovddup(xmmA, xmmB);
  e->vmovddup(xmmA, anyptr_gpB);
  e->vmovddup(ymmA, ymmB);
  e->vmovddup(ymmA, anyptr_gpB);
  e->vmovddup(zmmA, zmmB);
  e->vmovddup(zmmA, anyptr_gpB);
  e->vmovdqa32(xmmA, xmmB);
  e->vmovdqa32(xmmA, anyptr_gpB);
  e->vmovdqa32(xmmA, xmmB);
  e->vmovdqa32(anyptr_gpA, xmmB);
  e->vmovdqa32(ymmA, ymmB);
  e->vmovdqa32(ymmA, anyptr_gpB);
  e->vmovdqa32(ymmA, ymmB);
  e->vmovdqa32(anyptr_gpA, ymmB);
  e->vmovdqa32(zmmA, zmmB);
  e->vmovdqa32(zmmA, anyptr_gpB);
  e->vmovdqa32(zmmA, zmmB);
  e->vmovdqa32(anyptr_gpA, zmmB);
  e->vmovdqa64(xmmA, xmmB);
  e->vmovdqa64(xmmA, anyptr_gpB);
  e->vmovdqa64(xmmA, xmmB);
  e->vmovdqa64(anyptr_gpA, xmmB);
  e->vmovdqa64(ymmA, ymmB);
  e->vmovdqa64(ymmA, anyptr_gpB);
  e->vmovdqa64(ymmA, ymmB);
  e->vmovdqa64(anyptr_gpA, ymmB);
  e->vmovdqa64(zmmA, zmmB);
  e->vmovdqa64(zmmA, anyptr_gpB);
  e->vmovdqa64(zmmA, zmmB);
  e->vmovdqa64(anyptr_gpA, zmmB);
  e->vmovdqu16(xmmA, xmmB);
  e->vmovdqu16(xmmA, anyptr_gpB);
  e->vmovdqu16(xmmA, xmmB);
  e->vmovdqu16(anyptr_gpA, xmmB);
  e->vmovdqu16(ymmA, ymmB);
  e->vmovdqu16(ymmA, anyptr_gpB);
  e->vmovdqu16(ymmA, ymmB);
  e->vmovdqu16(anyptr_gpA, ymmB);
  e->vmovdqu16(zmmA, zmmB);
  e->vmovdqu16(zmmA, anyptr_gpB);
  e->vmovdqu16(zmmA, zmmB);
  e->vmovdqu16(anyptr_gpA, zmmB);
  e->vmovdqu32(xmmA, xmmB);
  e->vmovdqu32(xmmA, anyptr_gpB);
  e->vmovdqu32(xmmA, xmmB);
  e->vmovdqu32(anyptr_gpA, xmmB);
  e->vmovdqu32(ymmA, ymmB);
  e->vmovdqu32(ymmA, anyptr_gpB);
  e->vmovdqu32(ymmA, ymmB);
  e->vmovdqu32(anyptr_gpA, ymmB);
  e->vmovdqu32(zmmA, zmmB);
  e->vmovdqu32(zmmA, anyptr_gpB);
  e->vmovdqu32(zmmA, zmmB);
  e->vmovdqu32(anyptr_gpA, zmmB);
  e->vmovdqu64(xmmA, xmmB);
  e->vmovdqu64(xmmA, anyptr_gpB);
  e->vmovdqu64(xmmA, xmmB);
  e->vmovdqu64(anyptr_gpA, xmmB);
  e->vmovdqu64(ymmA, ymmB);
  e->vmovdqu64(ymmA, anyptr_gpB);
  e->vmovdqu64(ymmA, ymmB);
  e->vmovdqu64(anyptr_gpA, ymmB);
  e->vmovdqu64(zmmA, zmmB);
  e->vmovdqu64(zmmA, anyptr_gpB);
  e->vmovdqu64(zmmA, zmmB);
  e->vmovdqu64(anyptr_gpA, zmmB);
  e->vmovdqu8(xmmA, xmmB);
  e->vmovdqu8(xmmA, anyptr_gpB);
  e->vmovdqu8(xmmA, xmmB);
  e->vmovdqu8(anyptr_gpA, xmmB);
  e->vmovdqu8(ymmA, ymmB);
  e->vmovdqu8(ymmA, anyptr_gpB);
  e->vmovdqu8(ymmA, ymmB);
  e->vmovdqu8(anyptr_gpA, ymmB);
  e->vmovdqu8(zmmA, zmmB);
  e->vmovdqu8(zmmA, anyptr_gpB);
  e->vmovdqu8(zmmA, zmmB);
  e->vmovdqu8(anyptr_gpA, zmmB);
  e->vmovhlps(xmmA, xmmB, xmmC);
  e->vmovhpd(anyptr_gpA, xmmB);
  e->vmovhpd(xmmA, xmmB, anyptr_gpC);
  e->vmovhps(anyptr_gpA, xmmB);
  e->vmovhps(xmmA, xmmB, anyptr_gpC);
  e->vmovlhps(xmmA, xmmB, xmmC);
  e->vmovlpd(anyptr_gpA, xmmB);
  e->vmovlpd(xmmA, xmmB, anyptr_gpC);
  e->vmovlps(anyptr_gpA, xmmB);
  e->vmovlps(xmmA, xmmB, anyptr_gpC);
  e->vmovntdq(anyptr_gpA, xmmB);
  e->vmovntdq(anyptr_gpA, ymmB);
  e->vmovntdq(anyptr_gpA, zmmB);
  e->vmovntdqa(xmmA, anyptr_gpB);
  e->vmovntdqa(ymmA, anyptr_gpB);
  e->vmovntdqa(zmmA, anyptr_gpB);
  e->vmovntpd(anyptr_gpA, xmmB);
  e->vmovntpd(anyptr_gpA, ymmB);
  e->vmovntpd(anyptr_gpA, zmmB);
  e->vmovntps(anyptr_gpA, xmmB);
  e->vmovntps(anyptr_gpA, ymmB);
  e->vmovntps(anyptr_gpA, zmmB);
  if (isX64) e->vmovq(gzA, xmmB);
  if (isX64) e->vmovq(xmmA, gzB);
  e->vmovq(anyptr_gpA, xmmB);
  e->vmovq(xmmA, anyptr_gpB);
  e->vmovq(xmmA, xmmB);
  e->vmovq(xmmA, anyptr_gpB);
  e->vmovq(xmmA, xmmB);
  e->vmovq(anyptr_gpA, xmmB);
  e->vmovsd(anyptr_gpA, xmmB);
  e->vmovsd(xmmA, anyptr_gpB);
  e->vmovsd(xmmA, xmmB, xmmC);
  e->vmovsd(xmmA, xmmB, xmmC);
  e->vmovshdup(xmmA, xmmB);
  e->vmovshdup(xmmA, anyptr_gpB);
  e->vmovshdup(ymmA, ymmB);
  e->vmovshdup(ymmA, anyptr_gpB);
  e->vmovshdup(zmmA, zmmB);
  e->vmovshdup(zmmA, anyptr_gpB);
  e->vmovsldup(xmmA, xmmB);
  e->vmovsldup(xmmA, anyptr_gpB);
  e->vmovsldup(ymmA, ymmB);
  e->vmovsldup(ymmA, anyptr_gpB);
  e->vmovsldup(zmmA, zmmB);
  e->vmovsldup(zmmA, anyptr_gpB);
  e->vmovss(anyptr_gpA, xmmB);
  e->vmovss(xmmA, anyptr_gpB);
  e->vmovss(xmmA, xmmB, xmmC);
  e->vmovss(xmmA, xmmB, xmmC);
  e->vmovupd(xmmA, xmmB);
  e->vmovupd(xmmA, anyptr_gpB);
  e->vmovupd(xmmA, xmmB);
  e->vmovupd(anyptr_gpA, xmmB);
  e->vmovupd(ymmA, ymmB);
  e->vmovupd(ymmA, anyptr_gpB);
  e->vmovupd(ymmA, ymmB);
  e->vmovupd(anyptr_gpA, ymmB);
  e->vmovupd(zmmA, zmmB);
  e->vmovupd(zmmA, anyptr_gpB);
  e->vmovupd(zmmA, zmmB);
  e->vmovupd(anyptr_gpA, zmmB);
  e->vmovups(xmmA, xmmB);
  e->vmovups(xmmA, anyptr_gpB);
  e->vmovups(xmmA, xmmB);
  e->vmovups(anyptr_gpA, xmmB);
  e->vmovups(ymmA, ymmB);
  e->vmovups(ymmA, anyptr_gpB);
  e->vmovups(ymmA, ymmB);
  e->vmovups(anyptr_gpA, ymmB);
  e->vmovups(zmmA, zmmB);
  e->vmovups(zmmA, anyptr_gpB);
  e->vmovups(zmmA, zmmB);
  e->vmovups(anyptr_gpA, zmmB);
  e->vmulpd(xmmA, xmmB, xmmC);
  e->vmulpd(xmmA, xmmB, anyptr_gpC);
  e->vmulpd(ymmA, ymmB, ymmC);
  e->vmulpd(ymmA, ymmB, anyptr_gpC);
  e->vmulpd(zmmA, zmmB, zmmC);
  e->vmulpd(zmmA, zmmB, anyptr_gpC);
  e->vmulps(xmmA, xmmB, xmmC);
  e->vmulps(xmmA, xmmB, anyptr_gpC);
  e->vmulps(ymmA, ymmB, ymmC);
  e->vmulps(ymmA, ymmB, anyptr_gpC);
  e->vmulps(zmmA, zmmB, zmmC);
  e->vmulps(zmmA, zmmB, anyptr_gpC);
  e->vmulsd(xmmA, xmmB, xmmC);
  e->vmulsd(xmmA, xmmB, anyptr_gpC);
  e->vmulss(xmmA, xmmB, xmmC);
  e->vmulss(xmmA, xmmB, anyptr_gpC);
  e->vorpd(xmmA, xmmB, xmmC);
  e->vorpd(xmmA, xmmB, anyptr_gpC);
  e->vorpd(ymmA, ymmB, ymmC);
  e->vorpd(ymmA, ymmB, anyptr_gpC);
  e->vorpd(zmmA, zmmB, zmmC);
  e->vorpd(zmmA, zmmB, anyptr_gpC);
  e->vorps(xmmA, xmmB, xmmC);
  e->vorps(xmmA, xmmB, anyptr_gpC);
  e->vorps(ymmA, ymmB, ymmC);
  e->vorps(ymmA, ymmB, anyptr_gpC);
  e->vorps(zmmA, zmmB, zmmC);
  e->vorps(zmmA, zmmB, anyptr_gpC);
  e->vpabsb(xmmA, xmmB);
  e->vpabsb(xmmA, anyptr_gpB);
  e->vpabsb(ymmA, ymmB);
  e->vpabsb(ymmA, anyptr_gpB);
  e->vpabsb(zmmA, zmmB);
  e->vpabsb(zmmA, anyptr_gpB);
  e->vpabsd(xmmA, xmmB);
  e->vpabsd(xmmA, anyptr_gpB);
  e->vpabsd(ymmA, ymmB);
  e->vpabsd(ymmA, anyptr_gpB);
  e->vpabsd(zmmA, zmmB);
  e->vpabsd(zmmA, anyptr_gpB);
  e->vpabsq(xmmA, xmmB);
  e->vpabsq(xmmA, anyptr_gpB);
  e->vpabsq(ymmA, ymmB);
  e->vpabsq(ymmA, anyptr_gpB);
  e->vpabsq(zmmA, zmmB);
  e->vpabsq(zmmA, anyptr_gpB);
  e->vpabsw(xmmA, xmmB);
  e->vpabsw(xmmA, anyptr_gpB);
  e->vpabsw(ymmA, ymmB);
  e->vpabsw(ymmA, anyptr_gpB);
  e->vpabsw(zmmA, zmmB);
  e->vpabsw(zmmA, anyptr_gpB);
  e->vpackssdw(xmmA, xmmB, xmmC);
  e->vpackssdw(xmmA, xmmB, anyptr_gpC);
  e->vpackssdw(ymmA, ymmB, ymmC);
  e->vpackssdw(ymmA, ymmB, anyptr_gpC);
  e->vpackssdw(zmmA, zmmB, zmmC);
  e->vpackssdw(zmmA, zmmB, anyptr_gpC);
  e->vpacksswb(xmmA, xmmB, xmmC);
  e->vpacksswb(xmmA, xmmB, anyptr_gpC);
  e->vpacksswb(ymmA, ymmB, ymmC);
  e->vpacksswb(ymmA, ymmB, anyptr_gpC);
  e->vpacksswb(zmmA, zmmB, zmmC);
  e->vpacksswb(zmmA, zmmB, anyptr_gpC);
  e->vpackusdw(xmmA, xmmB, xmmC);
  e->vpackusdw(xmmA, xmmB, anyptr_gpC);
  e->vpackusdw(ymmA, ymmB, ymmC);
  e->vpackusdw(ymmA, ymmB, anyptr_gpC);
  e->vpackusdw(zmmA, zmmB, zmmC);
  e->vpackusdw(zmmA, zmmB, anyptr_gpC);
  e->vpackuswb(xmmA, xmmB, xmmC);
  e->vpackuswb(xmmA, xmmB, anyptr_gpC);
  e->vpackuswb(ymmA, ymmB, ymmC);
  e->vpackuswb(ymmA, ymmB, anyptr_gpC);
  e->vpackuswb(zmmA, zmmB, zmmC);
  e->vpackuswb(zmmA, zmmB, anyptr_gpC);
  e->vpaddb(xmmA, xmmB, xmmC);
  e->vpaddb(xmmA, xmmB, anyptr_gpC);
  e->vpaddb(ymmA, ymmB, ymmC);
  e->vpaddb(ymmA, ymmB, anyptr_gpC);
  e->vpaddb(zmmA, zmmB, zmmC);
  e->vpaddb(zmmA, zmmB, anyptr_gpC);
  e->vpaddd(xmmA, xmmB, xmmC);
  e->vpaddd(xmmA, xmmB, anyptr_gpC);
  e->vpaddd(ymmA, ymmB, ymmC);
  e->vpaddd(ymmA, ymmB, anyptr_gpC);
  e->vpaddd(zmmA, zmmB, zmmC);
  e->vpaddd(zmmA, zmmB, anyptr_gpC);
  e->vpaddq(xmmA, xmmB, xmmC);
  e->vpaddq(xmmA, xmmB, anyptr_gpC);
  e->vpaddq(ymmA, ymmB, ymmC);
  e->vpaddq(ymmA, ymmB, anyptr_gpC);
  e->vpaddq(zmmA, zmmB, zmmC);
  e->vpaddq(zmmA, zmmB, anyptr_gpC);
  e->vpaddsb(xmmA, xmmB, xmmC);
  e->vpaddsb(xmmA, xmmB, anyptr_gpC);
  e->vpaddsb(ymmA, ymmB, ymmC);
  e->vpaddsb(ymmA, ymmB, anyptr_gpC);
  e->vpaddsb(zmmA, zmmB, zmmC);
  e->vpaddsb(zmmA, zmmB, anyptr_gpC);
  e->vpaddsw(xmmA, xmmB, xmmC);
  e->vpaddsw(xmmA, xmmB, anyptr_gpC);
  e->vpaddsw(ymmA, ymmB, ymmC);
  e->vpaddsw(ymmA, ymmB, anyptr_gpC);
  e->vpaddsw(zmmA, zmmB, zmmC);
  e->vpaddsw(zmmA, zmmB, anyptr_gpC);
  e->vpaddusb(xmmA, xmmB, xmmC);
  e->vpaddusb(xmmA, xmmB, anyptr_gpC);
  e->vpaddusb(ymmA, ymmB, ymmC);
  e->vpaddusb(ymmA, ymmB, anyptr_gpC);
  e->vpaddusb(zmmA, zmmB, zmmC);
  e->vpaddusb(zmmA, zmmB, anyptr_gpC);
  e->vpaddusw(xmmA, xmmB, xmmC);
  e->vpaddusw(xmmA, xmmB, anyptr_gpC);
  e->vpaddusw(ymmA, ymmB, ymmC);
  e->vpaddusw(ymmA, ymmB, anyptr_gpC);
  e->vpaddusw(zmmA, zmmB, zmmC);
  e->vpaddusw(zmmA, zmmB, anyptr_gpC);
  e->vpaddw(xmmA, xmmB, xmmC);
  e->vpaddw(xmmA, xmmB, anyptr_gpC);
  e->vpaddw(ymmA, ymmB, ymmC);
  e->vpaddw(ymmA, ymmB, anyptr_gpC);
  e->vpaddw(zmmA, zmmB, zmmC);
  e->vpaddw(zmmA, zmmB, anyptr_gpC);
  e->vpalignr(xmmA, xmmB, xmmC, 0);
  e->vpalignr(xmmA, xmmB, anyptr_gpC, 0);
  e->vpalignr(ymmA, ymmB, ymmC, 0);
  e->vpalignr(ymmA, ymmB, anyptr_gpC, 0);
  e->vpalignr(zmmA, zmmB, zmmC, 0);
  e->vpalignr(zmmA, zmmB, anyptr_gpC, 0);
  e->vpandd(xmmA, xmmB, xmmC);
  e->vpandd(xmmA, xmmB, anyptr_gpC);
  e->vpandd(ymmA, ymmB, ymmC);
  e->vpandd(ymmA, ymmB, anyptr_gpC);
  e->vpandd(zmmA, zmmB, zmmC);
  e->vpandd(zmmA, zmmB, anyptr_gpC);
  e->vpandnd(xmmA, xmmB, xmmC);
  e->vpandnd(xmmA, xmmB, anyptr_gpC);
  e->vpandnd(ymmA, ymmB, ymmC);
  e->vpandnd(ymmA, ymmB, anyptr_gpC);
  e->vpandnd(zmmA, zmmB, zmmC);
  e->vpandnd(zmmA, zmmB, anyptr_gpC);
  e->vpandnq(xmmA, xmmB, xmmC);
  e->vpandnq(xmmA, xmmB, anyptr_gpC);
  e->vpandnq(ymmA, ymmB, ymmC);
  e->vpandnq(ymmA, ymmB, anyptr_gpC);
  e->vpandnq(zmmA, zmmB, zmmC);
  e->vpandnq(zmmA, zmmB, anyptr_gpC);
  e->vpandq(xmmA, xmmB, xmmC);
  e->vpandq(xmmA, xmmB, anyptr_gpC);
  e->vpandq(ymmA, ymmB, ymmC);
  e->vpandq(ymmA, ymmB, anyptr_gpC);
  e->vpandq(zmmA, zmmB, zmmC);
  e->vpandq(zmmA, zmmB, anyptr_gpC);
  e->vpavgb(xmmA, xmmB, xmmC);
  e->vpavgb(xmmA, xmmB, anyptr_gpC);
  e->vpavgb(ymmA, ymmB, ymmC);
  e->vpavgb(ymmA, ymmB, anyptr_gpC);
  e->vpavgb(zmmA, zmmB, zmmC);
  e->vpavgb(zmmA, zmmB, anyptr_gpC);
  e->vpavgw(xmmA, xmmB, xmmC);
  e->vpavgw(xmmA, xmmB, anyptr_gpC);
  e->vpavgw(ymmA, ymmB, ymmC);
  e->vpavgw(ymmA, ymmB, anyptr_gpC);
  e->vpavgw(zmmA, zmmB, zmmC);
  e->vpavgw(zmmA, zmmB, anyptr_gpC);
  e->vpbroadcastb(xmmA, gdB);
  e->vpbroadcastb(xmmA, gzB);
  e->vpbroadcastb(xmmA, xmmB);
  e->vpbroadcastb(xmmA, anyptr_gpB);
  e->vpbroadcastb(ymmA, gdB);
  e->vpbroadcastb(ymmA, gzB);
  e->vpbroadcastb(ymmA, xmmB);
  e->vpbroadcastb(ymmA, anyptr_gpB);
  e->vpbroadcastb(zmmA, gdB);
  e->vpbroadcastb(zmmA, gzB);
  e->vpbroadcastb(zmmA, xmmB);
  e->vpbroadcastb(zmmA, anyptr_gpB);
  e->vpbroadcastd(xmmA, gdB);
  e->vpbroadcastd(xmmA, gzB);
  e->vpbroadcastd(xmmA, xmmB);
  e->vpbroadcastd(xmmA, anyptr_gpB);
  e->vpbroadcastd(ymmA, gdB);
  e->vpbroadcastd(ymmA, gzB);
  e->vpbroadcastd(ymmA, xmmB);
  e->vpbroadcastd(ymmA, anyptr_gpB);
  e->vpbroadcastd(zmmA, gdB);
  e->vpbroadcastd(zmmA, gzB);
  e->vpbroadcastd(zmmA, xmmB);
  e->vpbroadcastd(zmmA, anyptr_gpB);
  e->vpbroadcastmb2d(xmmA, kB);
  e->vpbroadcastmb2d(ymmA, kB);
  e->vpbroadcastmb2d(zmmA, kB);
  e->vpbroadcastmb2q(xmmA, kB);
  e->vpbroadcastmb2q(ymmA, kB);
  e->vpbroadcastmb2q(zmmA, kB);
  if (isX64) e->vpbroadcastq(xmmA, gzB);
  e->vpbroadcastq(xmmA, xmmB);
  e->vpbroadcastq(xmmA, anyptr_gpB);
  if (isX64) e->vpbroadcastq(ymmA, gzB);
  e->vpbroadcastq(ymmA, xmmB);
  e->vpbroadcastq(ymmA, anyptr_gpB);
  if (isX64) e->vpbroadcastq(zmmA, gzB);
  e->vpbroadcastq(zmmA, xmmB);
  e->vpbroadcastq(zmmA, anyptr_gpB);
  e->vpbroadcastw(xmmA, gdB);
  e->vpbroadcastw(xmmA, gzB);
  e->vpbroadcastw(xmmA, xmmB);
  e->vpbroadcastw(xmmA, anyptr_gpB);
  e->vpbroadcastw(ymmA, gdB);
  e->vpbroadcastw(ymmA, gzB);
  e->vpbroadcastw(ymmA, xmmB);
  e->vpbroadcastw(ymmA, anyptr_gpB);
  e->vpbroadcastw(zmmA, gdB);
  e->vpbroadcastw(zmmA, gzB);
  e->vpbroadcastw(zmmA, xmmB);
  e->vpbroadcastw(zmmA, anyptr_gpB);
  e->vpcmpb(kA, xmmB, xmmC, 0);
  e->vpcmpb(kA, xmmB, anyptr_gpC, 0);
  e->vpcmpb(kA, ymmB, ymmC, 0);
  e->vpcmpb(kA, ymmB, anyptr_gpC, 0);
  e->vpcmpb(kA, zmmB, zmmC, 0);
  e->vpcmpb(kA, zmmB, anyptr_gpC, 0);
  e->vpcmpd(kA, xmmB, xmmC, 0);
  e->vpcmpd(kA, xmmB, anyptr_gpC, 0);
  e->vpcmpd(kA, ymmB, ymmC, 0);
  e->vpcmpd(kA, ymmB, anyptr_gpC, 0);
  e->vpcmpd(kA, zmmB, zmmC, 0);
  e->vpcmpd(kA, zmmB, anyptr_gpC, 0);
  e->vpcmpeqb(kA, xmmB, xmmC);
  e->vpcmpeqb(kA, xmmB, anyptr_gpC);
  e->vpcmpeqb(kA, ymmB, ymmC);
  e->vpcmpeqb(kA, ymmB, anyptr_gpC);
  e->vpcmpeqb(kA, zmmB, zmmC);
  e->vpcmpeqb(kA, zmmB, anyptr_gpC);
  e->vpcmpeqd(kA, xmmB, xmmC);
  e->vpcmpeqd(kA, xmmB, anyptr_gpC);
  e->vpcmpeqd(kA, ymmB, ymmC);
  e->vpcmpeqd(kA, ymmB, anyptr_gpC);
  e->vpcmpeqd(kA, zmmB, zmmC);
  e->vpcmpeqd(kA, zmmB, anyptr_gpC);
  e->vpcmpeqq(kA, xmmB, xmmC);
  e->vpcmpeqq(kA, xmmB, anyptr_gpC);
  e->vpcmpeqq(kA, ymmB, ymmC);
  e->vpcmpeqq(kA, ymmB, anyptr_gpC);
  e->vpcmpeqq(kA, zmmB, zmmC);
  e->vpcmpeqq(kA, zmmB, anyptr_gpC);
  e->vpcmpeqw(kA, xmmB, xmmC);
  e->vpcmpeqw(kA, xmmB, anyptr_gpC);
  e->vpcmpeqw(kA, ymmB, ymmC);
  e->vpcmpeqw(kA, ymmB, anyptr_gpC);
  e->vpcmpeqw(kA, zmmB, zmmC);
  e->vpcmpeqw(kA, zmmB, anyptr_gpC);
  e->vpcmpgtb(kA, xmmB, xmmC);
  e->vpcmpgtb(kA, xmmB, anyptr_gpC);
  e->vpcmpgtb(kA, ymmB, ymmC);
  e->vpcmpgtb(kA, ymmB, anyptr_gpC);
  e->vpcmpgtb(kA, zmmB, zmmC);
  e->vpcmpgtb(kA, zmmB, anyptr_gpC);
  e->vpcmpgtd(kA, xmmB, xmmC);
  e->vpcmpgtd(kA, xmmB, anyptr_gpC);
  e->vpcmpgtd(kA, ymmB, ymmC);
  e->vpcmpgtd(kA, ymmB, anyptr_gpC);
  e->vpcmpgtd(kA, zmmB, zmmC);
  e->vpcmpgtd(kA, zmmB, anyptr_gpC);
  e->vpcmpgtq(kA, xmmB, xmmC);
  e->vpcmpgtq(kA, xmmB, anyptr_gpC);
  e->vpcmpgtq(kA, ymmB, ymmC);
  e->vpcmpgtq(kA, ymmB, anyptr_gpC);
  e->vpcmpgtq(kA, zmmB, zmmC);
  e->vpcmpgtq(kA, zmmB, anyptr_gpC);
  e->vpcmpgtw(kA, xmmB, xmmC);
  e->vpcmpgtw(kA, xmmB, anyptr_gpC);
  e->vpcmpgtw(kA, ymmB, ymmC);
  e->vpcmpgtw(kA, ymmB, anyptr_gpC);
  e->vpcmpgtw(kA, zmmB, zmmC);
  e->vpcmpgtw(kA, zmmB, anyptr_gpC);
  e->vpcmpq(kA, xmmB, xmmC, 0);
  e->vpcmpq(kA, xmmB, anyptr_gpC, 0);
  e->vpcmpq(kA, ymmB, ymmC, 0);
  e->vpcmpq(kA, ymmB, anyptr_gpC, 0);
  e->vpcmpq(kA, zmmB, zmmC, 0);
  e->vpcmpq(kA, zmmB, anyptr_gpC, 0);
  e->vpcmpub(kA, xmmB, xmmC, 0);
  e->vpcmpub(kA, xmmB, anyptr_gpC, 0);
  e->vpcmpub(kA, ymmB, ymmC, 0);
  e->vpcmpub(kA, ymmB, anyptr_gpC, 0);
  e->vpcmpub(kA, zmmB, zmmC, 0);
  e->vpcmpub(kA, zmmB, anyptr_gpC, 0);
  e->vpcmpud(kA, xmmB, xmmC, 0);
  e->vpcmpud(kA, xmmB, anyptr_gpC, 0);
  e->vpcmpud(kA, ymmB, ymmC, 0);
  e->vpcmpud(kA, ymmB, anyptr_gpC, 0);
  e->vpcmpud(kA, zmmB, zmmC, 0);
  e->vpcmpud(kA, zmmB, anyptr_gpC, 0);
  e->vpcmpuq(kA, xmmB, xmmC, 0);
  e->vpcmpuq(kA, xmmB, anyptr_gpC, 0);
  e->vpcmpuq(kA, ymmB, ymmC, 0);
  e->vpcmpuq(kA, ymmB, anyptr_gpC, 0);
  e->vpcmpuq(kA, zmmB, zmmC, 0);
  e->vpcmpuq(kA, zmmB, anyptr_gpC, 0);
  e->vpcmpuw(kA, xmmB, xmmC, 0);
  e->vpcmpuw(kA, xmmB, anyptr_gpC, 0);
  e->vpcmpuw(kA, ymmB, ymmC, 0);
  e->vpcmpuw(kA, ymmB, anyptr_gpC, 0);
  e->vpcmpuw(kA, zmmB, zmmC, 0);
  e->vpcmpuw(kA, zmmB, anyptr_gpC, 0);
  e->vpcmpw(kA, xmmB, xmmC, 0);
  e->vpcmpw(kA, xmmB, anyptr_gpC, 0);
  e->vpcmpw(kA, ymmB, ymmC, 0);
  e->vpcmpw(kA, ymmB, anyptr_gpC, 0);
  e->vpcmpw(kA, zmmB, zmmC, 0);
  e->vpcmpw(kA, zmmB, anyptr_gpC, 0);
  e->vpcompressd(xmmA, xmmB);
  e->vpcompressd(anyptr_gpA, xmmB);
  e->vpcompressd(ymmA, ymmB);
  e->vpcompressd(anyptr_gpA, ymmB);
  e->vpcompressd(zmmA, zmmB);
  e->vpcompressd(anyptr_gpA, zmmB);
  e->vpcompressq(xmmA, xmmB);
  e->vpcompressq(anyptr_gpA, xmmB);
  e->vpcompressq(ymmA, ymmB);
  e->vpcompressq(anyptr_gpA, ymmB);
  e->vpcompressq(zmmA, zmmB);
  e->vpcompressq(anyptr_gpA, zmmB);
  e->vpconflictd(xmmA, xmmB);
  e->vpconflictd(xmmA, anyptr_gpB);
  e->vpconflictd(ymmA, ymmB);
  e->vpconflictd(ymmA, anyptr_gpB);
  e->vpconflictd(zmmA, zmmB);
  e->vpconflictd(zmmA, anyptr_gpB);
  e->vpconflictq(xmmA, xmmB);
  e->vpconflictq(xmmA, anyptr_gpB);
  e->vpconflictq(ymmA, ymmB);
  e->vpconflictq(ymmA, anyptr_gpB);
  e->vpconflictq(zmmA, zmmB);
  e->vpconflictq(zmmA, anyptr_gpB);
  e->vpermb(xmmA, xmmB, xmmC);
  e->vpermb(xmmA, xmmB, anyptr_gpC);
  e->vpermb(ymmA, ymmB, ymmC);
  e->vpermb(ymmA, ymmB, anyptr_gpC);
  e->vpermb(zmmA, zmmB, zmmC);
  e->vpermb(zmmA, zmmB, anyptr_gpC);
  e->vpermd(ymmA, ymmB, ymmC);
  e->vpermd(ymmA, ymmB, anyptr_gpC);
  e->vpermd(zmmA, zmmB, zmmC);
  e->vpermd(zmmA, zmmB, anyptr_gpC);
  e->vpermi2b(xmmA, xmmB, xmmC);
  e->vpermi2b(xmmA, xmmB, anyptr_gpC);
  e->vpermi2b(ymmA, ymmB, ymmC);
  e->vpermi2b(ymmA, ymmB, anyptr_gpC);
  e->vpermi2b(zmmA, zmmB, zmmC);
  e->vpermi2b(zmmA, zmmB, anyptr_gpC);
  e->vpermi2d(xmmA, xmmB, xmmC);
  e->vpermi2d(xmmA, xmmB, anyptr_gpC);
  e->vpermi2d(ymmA, ymmB, ymmC);
  e->vpermi2d(ymmA, ymmB, anyptr_gpC);
  e->vpermi2d(zmmA, zmmB, zmmC);
  e->vpermi2d(zmmA, zmmB, anyptr_gpC);
  e->vpermi2pd(xmmA, xmmB, xmmC);
  e->vpermi2pd(xmmA, xmmB, anyptr_gpC);
  e->vpermi2pd(ymmA, ymmB, ymmC);
  e->vpermi2pd(ymmA, ymmB, anyptr_gpC);
  e->vpermi2pd(zmmA, zmmB, zmmC);
  e->vpermi2pd(zmmA, zmmB, anyptr_gpC);
  e->vpermi2ps(xmmA, xmmB, xmmC);
  e->vpermi2ps(xmmA, xmmB, anyptr_gpC);
  e->vpermi2ps(ymmA, ymmB, ymmC);
  e->vpermi2ps(ymmA, ymmB, anyptr_gpC);
  e->vpermi2ps(zmmA, zmmB, zmmC);
  e->vpermi2ps(zmmA, zmmB, anyptr_gpC);
  e->vpermi2q(xmmA, xmmB, xmmC);
  e->vpermi2q(xmmA, xmmB, anyptr_gpC);
  e->vpermi2q(ymmA, ymmB, ymmC);
  e->vpermi2q(ymmA, ymmB, anyptr_gpC);
  e->vpermi2q(zmmA, zmmB, zmmC);
  e->vpermi2q(zmmA, zmmB, anyptr_gpC);
  e->vpermi2w(xmmA, xmmB, xmmC);
  e->vpermi2w(xmmA, xmmB, anyptr_gpC);
  e->vpermi2w(ymmA, ymmB, ymmC);
  e->vpermi2w(ymmA, ymmB, anyptr_gpC);
  e->vpermi2w(zmmA, zmmB, zmmC);
  e->vpermi2w(zmmA, zmmB, anyptr_gpC);
  e->vpermilpd(xmmA, xmmB, xmmC);
  e->vpermilpd(xmmA, xmmB, anyptr_gpC);
  e->vpermilpd(ymmA, ymmB, ymmC);
  e->vpermilpd(ymmA, ymmB, anyptr_gpC);
  e->vpermilpd(zmmA, zmmB, zmmC);
  e->vpermilpd(zmmA, zmmB, anyptr_gpC);
  e->vpermilpd(xmmA, xmmB, 0);
  e->vpermilpd(xmmA, anyptr_gpB, 0);
  e->vpermilpd(ymmA, ymmB, 0);
  e->vpermilpd(ymmA, anyptr_gpB, 0);
  e->vpermilpd(zmmA, zmmB, 0);
  e->vpermilpd(zmmA, anyptr_gpB, 0);
  e->vpermilps(xmmA, xmmB, xmmC);
  e->vpermilps(xmmA, xmmB, anyptr_gpC);
  e->vpermilps(ymmA, ymmB, ymmC);
  e->vpermilps(ymmA, ymmB, anyptr_gpC);
  e->vpermilps(zmmA, zmmB, zmmC);
  e->vpermilps(zmmA, zmmB, anyptr_gpC);
  e->vpermilps(xmmA, xmmB, 0);
  e->vpermilps(xmmA, anyptr_gpB, 0);
  e->vpermilps(ymmA, ymmB, 0);
  e->vpermilps(ymmA, anyptr_gpB, 0);
  e->vpermilps(zmmA, zmmB, 0);
  e->vpermilps(zmmA, anyptr_gpB, 0);
  e->vpermq(ymmA, ymmB, ymmC);
  e->vpermq(ymmA, ymmB, anyptr_gpC);
  e->vpermq(zmmA, zmmB, zmmC);
  e->vpermq(zmmA, zmmB, anyptr_gpC);
  e->vpermq(ymmA, ymmB, 0);
  e->vpermq(ymmA, anyptr_gpB, 0);
  e->vpermq(zmmA, zmmB, 0);
  e->vpermq(zmmA, anyptr_gpB, 0);
  e->vpermt2b(xmmA, xmmB, xmmC);
  e->vpermt2b(xmmA, xmmB, anyptr_gpC);
  e->vpermt2b(ymmA, ymmB, ymmC);
  e->vpermt2b(ymmA, ymmB, anyptr_gpC);
  e->vpermt2b(zmmA, zmmB, zmmC);
  e->vpermt2b(zmmA, zmmB, anyptr_gpC);
  e->vpermt2d(xmmA, xmmB, xmmC);
  e->vpermt2d(xmmA, xmmB, anyptr_gpC);
  e->vpermt2d(ymmA, ymmB, ymmC);
  e->vpermt2d(ymmA, ymmB, anyptr_gpC);
  e->vpermt2d(zmmA, zmmB, zmmC);
  e->vpermt2d(zmmA, zmmB, anyptr_gpC);
  e->vpermt2pd(xmmA, xmmB, xmmC);
  e->vpermt2pd(xmmA, xmmB, anyptr_gpC);
  e->vpermt2pd(ymmA, ymmB, ymmC);
  e->vpermt2pd(ymmA, ymmB, anyptr_gpC);
  e->vpermt2pd(zmmA, zmmB, zmmC);
  e->vpermt2pd(zmmA, zmmB, anyptr_gpC);
  e->vpermt2ps(xmmA, xmmB, xmmC);
  e->vpermt2ps(xmmA, xmmB, anyptr_gpC);
  e->vpermt2ps(ymmA, ymmB, ymmC);
  e->vpermt2ps(ymmA, ymmB, anyptr_gpC);
  e->vpermt2ps(zmmA, zmmB, zmmC);
  e->vpermt2ps(zmmA, zmmB, anyptr_gpC);
  e->vpermt2q(xmmA, xmmB, xmmC);
  e->vpermt2q(xmmA, xmmB, anyptr_gpC);
  e->vpermt2q(ymmA, ymmB, ymmC);
  e->vpermt2q(ymmA, ymmB, anyptr_gpC);
  e->vpermt2q(zmmA, zmmB, zmmC);
  e->vpermt2q(zmmA, zmmB, anyptr_gpC);
  e->vpermt2w(xmmA, xmmB, xmmC);
  e->vpermt2w(xmmA, xmmB, anyptr_gpC);
  e->vpermt2w(ymmA, ymmB, ymmC);
  e->vpermt2w(ymmA, ymmB, anyptr_gpC);
  e->vpermt2w(zmmA, zmmB, zmmC);
  e->vpermt2w(zmmA, zmmB, anyptr_gpC);
  e->vpermw(xmmA, xmmB, xmmC);
  e->vpermw(xmmA, xmmB, anyptr_gpC);
  e->vpermw(ymmA, ymmB, ymmC);
  e->vpermw(ymmA, ymmB, anyptr_gpC);
  e->vpermw(zmmA, zmmB, zmmC);
  e->vpermw(zmmA, zmmB, anyptr_gpC);
  e->vpexpandd(xmmA, xmmB);
  e->vpexpandd(xmmA, anyptr_gpB);
  e->vpexpandd(ymmA, ymmB);
  e->vpexpandd(ymmA, anyptr_gpB);
  e->vpexpandd(zmmA, zmmB);
  e->vpexpandd(zmmA, anyptr_gpB);
  e->vpexpandq(xmmA, xmmB);
  e->vpexpandq(xmmA, anyptr_gpB);
  e->vpexpandq(ymmA, ymmB);
  e->vpexpandq(ymmA, anyptr_gpB);
  e->vpexpandq(zmmA, zmmB);
  e->vpexpandq(zmmA, anyptr_gpB);
  e->vpextrb(gdA, xmmB, 0);
  e->vpextrb(anyptr_gpA, xmmB, 0);
  e->vpextrb(gzA, xmmB, 0);
  e->vpextrd(gdA, xmmB, 0);
  e->vpextrd(anyptr_gpA, xmmB, 0);
  if (isX64) e->vpextrd(gzA, xmmB, 0);
  if (isX64) e->vpextrq(gzA, xmmB, 0);
  e->vpextrq(anyptr_gpA, xmmB, 0);
  e->vpextrw(gdA, xmmB, 0);
  e->vpextrw(gzA, xmmB, 0);
  e->vpextrw(gdA, xmmB, 0);
  e->vpextrw(anyptr_gpA, xmmB, 0);
  e->vpextrw(gzA, xmmB, 0);
  e->vpgatherdd(xmmA, vx_ptr);
  e->vpgatherdd(ymmA, vy_ptr);
  e->vpgatherdd(zmmA, vz_ptr);
  e->vpgatherdq(xmmA, vx_ptr);
  e->vpgatherdq(ymmA, vy_ptr);
  e->vpgatherdq(zmmA, vz_ptr);
  e->vpgatherqd(xmmA, vx_ptr);
  e->vpgatherqd(ymmA, vy_ptr);
  e->vpgatherqd(zmmA, vz_ptr);
  e->vpgatherqq(xmmA, vx_ptr);
  e->vpgatherqq(ymmA, vy_ptr);
  e->vpgatherqq(zmmA, vz_ptr);
  e->vpinsrb(xmmA, xmmB, gdC, 0);
  e->vpinsrb(xmmA, xmmB, anyptr_gpC, 0);
  e->vpinsrb(xmmA, xmmB, gzC, 0);
  e->vpinsrd(xmmA, xmmB, gdC, 0);
  e->vpinsrd(xmmA, xmmB, anyptr_gpC, 0);
  e->vpinsrd(xmmA, xmmB, gzC, 0);
  if (isX64) e->vpinsrq(xmmA, xmmB, gzC, 0);
  e->vpinsrq(xmmA, xmmB, anyptr_gpC, 0);
  e->vpinsrw(xmmA, xmmB, gdC, 0);
  e->vpinsrw(xmmA, xmmB, anyptr_gpC, 0);
  e->vpinsrw(xmmA, xmmB, gzC, 0);
  e->vplzcntd(xmmA, xmmB);
  e->vplzcntd(xmmA, anyptr_gpB);
  e->vplzcntd(ymmA, ymmB);
  e->vplzcntd(ymmA, anyptr_gpB);
  e->vplzcntd(zmmA, zmmB);
  e->vplzcntd(zmmA, anyptr_gpB);
  e->vplzcntq(xmmA, xmmB);
  e->vplzcntq(xmmA, anyptr_gpB);
  e->vplzcntq(ymmA, ymmB);
  e->vplzcntq(ymmA, anyptr_gpB);
  e->vplzcntq(zmmA, zmmB);
  e->vplzcntq(zmmA, anyptr_gpB);
  e->vpmadd52huq(xmmA, xmmB, xmmC);
  e->vpmadd52huq(xmmA, xmmB, anyptr_gpC);
  e->vpmadd52huq(ymmA, ymmB, ymmC);
  e->vpmadd52huq(ymmA, ymmB, anyptr_gpC);
  e->vpmadd52huq(zmmA, zmmB, zmmC);
  e->vpmadd52huq(zmmA, zmmB, anyptr_gpC);
  e->vpmadd52luq(xmmA, xmmB, xmmC);
  e->vpmadd52luq(xmmA, xmmB, anyptr_gpC);
  e->vpmadd52luq(ymmA, ymmB, ymmC);
  e->vpmadd52luq(ymmA, ymmB, anyptr_gpC);
  e->vpmadd52luq(zmmA, zmmB, zmmC);
  e->vpmadd52luq(zmmA, zmmB, anyptr_gpC);
  e->vpmaddubsw(xmmA, xmmB, xmmC);
  e->vpmaddubsw(xmmA, xmmB, anyptr_gpC);
  e->vpmaddubsw(ymmA, ymmB, ymmC);
  e->vpmaddubsw(ymmA, ymmB, anyptr_gpC);
  e->vpmaddubsw(zmmA, zmmB, zmmC);
  e->vpmaddubsw(zmmA, zmmB, anyptr_gpC);
  e->vpmaddwd(xmmA, xmmB, xmmC);
  e->vpmaddwd(xmmA, xmmB, anyptr_gpC);
  e->vpmaddwd(ymmA, ymmB, ymmC);
  e->vpmaddwd(ymmA, ymmB, anyptr_gpC);
  e->vpmaddwd(zmmA, zmmB, zmmC);
  e->vpmaddwd(zmmA, zmmB, anyptr_gpC);
  e->vpmaxsb(xmmA, xmmB, xmmC);
  e->vpmaxsb(xmmA, xmmB, anyptr_gpC);
  e->vpmaxsb(ymmA, ymmB, ymmC);
  e->vpmaxsb(ymmA, ymmB, anyptr_gpC);
  e->vpmaxsb(zmmA, zmmB, zmmC);
  e->vpmaxsb(zmmA, zmmB, anyptr_gpC);
  e->vpmaxsd(xmmA, xmmB, xmmC);
  e->vpmaxsd(xmmA, xmmB, anyptr_gpC);
  e->vpmaxsd(ymmA, ymmB, ymmC);
  e->vpmaxsd(ymmA, ymmB, anyptr_gpC);
  e->vpmaxsd(zmmA, zmmB, zmmC);
  e->vpmaxsd(zmmA, zmmB, anyptr_gpC);
  e->vpmaxsq(xmmA, xmmB, xmmC);
  e->vpmaxsq(xmmA, xmmB, anyptr_gpC);
  e->vpmaxsq(ymmA, ymmB, ymmC);
  e->vpmaxsq(ymmA, ymmB, anyptr_gpC);
  e->vpmaxsq(zmmA, zmmB, zmmC);
  e->vpmaxsq(zmmA, zmmB, anyptr_gpC);
  e->vpmaxsw(xmmA, xmmB, xmmC);
  e->vpmaxsw(xmmA, xmmB, anyptr_gpC);
  e->vpmaxsw(ymmA, ymmB, ymmC);
  e->vpmaxsw(ymmA, ymmB, anyptr_gpC);
  e->vpmaxsw(zmmA, zmmB, zmmC);
  e->vpmaxsw(zmmA, zmmB, anyptr_gpC);
  e->vpmaxub(xmmA, xmmB, xmmC);
  e->vpmaxub(xmmA, xmmB, anyptr_gpC);
  e->vpmaxub(ymmA, ymmB, ymmC);
  e->vpmaxub(ymmA, ymmB, anyptr_gpC);
  e->vpmaxub(zmmA, zmmB, zmmC);
  e->vpmaxub(zmmA, zmmB, anyptr_gpC);
  e->vpmaxud(xmmA, xmmB, xmmC);
  e->vpmaxud(xmmA, xmmB, anyptr_gpC);
  e->vpmaxud(ymmA, ymmB, ymmC);
  e->vpmaxud(ymmA, ymmB, anyptr_gpC);
  e->vpmaxud(zmmA, zmmB, zmmC);
  e->vpmaxud(zmmA, zmmB, anyptr_gpC);
  e->vpmaxuq(xmmA, xmmB, xmmC);
  e->vpmaxuq(xmmA, xmmB, anyptr_gpC);
  e->vpmaxuq(ymmA, ymmB, ymmC);
  e->vpmaxuq(ymmA, ymmB, anyptr_gpC);
  e->vpmaxuq(zmmA, zmmB, zmmC);
  e->vpmaxuq(zmmA, zmmB, anyptr_gpC);
  e->vpmaxuw(xmmA, xmmB, xmmC);
  e->vpmaxuw(xmmA, xmmB, anyptr_gpC);
  e->vpmaxuw(ymmA, ymmB, ymmC);
  e->vpmaxuw(ymmA, ymmB, anyptr_gpC);
  e->vpmaxuw(zmmA, zmmB, zmmC);
  e->vpmaxuw(zmmA, zmmB, anyptr_gpC);
  e->vpminsb(xmmA, xmmB, xmmC);
  e->vpminsb(xmmA, xmmB, anyptr_gpC);
  e->vpminsb(ymmA, ymmB, ymmC);
  e->vpminsb(ymmA, ymmB, anyptr_gpC);
  e->vpminsb(zmmA, zmmB, zmmC);
  e->vpminsb(zmmA, zmmB, anyptr_gpC);
  e->vpminsd(xmmA, xmmB, xmmC);
  e->vpminsd(xmmA, xmmB, anyptr_gpC);
  e->vpminsd(ymmA, ymmB, ymmC);
  e->vpminsd(ymmA, ymmB, anyptr_gpC);
  e->vpminsd(zmmA, zmmB, zmmC);
  e->vpminsd(zmmA, zmmB, anyptr_gpC);
  e->vpminsq(xmmA, xmmB, xmmC);
  e->vpminsq(xmmA, xmmB, anyptr_gpC);
  e->vpminsq(ymmA, ymmB, ymmC);
  e->vpminsq(ymmA, ymmB, anyptr_gpC);
  e->vpminsq(zmmA, zmmB, zmmC);
  e->vpminsq(zmmA, zmmB, anyptr_gpC);
  e->vpminsw(xmmA, xmmB, xmmC);
  e->vpminsw(xmmA, xmmB, anyptr_gpC);
  e->vpminsw(ymmA, ymmB, ymmC);
  e->vpminsw(ymmA, ymmB, anyptr_gpC);
  e->vpminsw(zmmA, zmmB, zmmC);
  e->vpminsw(zmmA, zmmB, anyptr_gpC);
  e->vpminub(xmmA, xmmB, xmmC);
  e->vpminub(xmmA, xmmB, anyptr_gpC);
  e->vpminub(ymmA, ymmB, ymmC);
  e->vpminub(ymmA, ymmB, anyptr_gpC);
  e->vpminub(zmmA, zmmB, zmmC);
  e->vpminub(zmmA, zmmB, anyptr_gpC);
  e->vpminud(xmmA, xmmB, xmmC);
  e->vpminud(xmmA, xmmB, anyptr_gpC);
  e->vpminud(ymmA, ymmB, ymmC);
  e->vpminud(ymmA, ymmB, anyptr_gpC);
  e->vpminud(zmmA, zmmB, zmmC);
  e->vpminud(zmmA, zmmB, anyptr_gpC);
  e->vpminuq(xmmA, xmmB, xmmC);
  e->vpminuq(xmmA, xmmB, anyptr_gpC);
  e->vpminuq(ymmA, ymmB, ymmC);
  e->vpminuq(ymmA, ymmB, anyptr_gpC);
  e->vpminuq(zmmA, zmmB, zmmC);
  e->vpminuq(zmmA, zmmB, anyptr_gpC);
  e->vpminuw(xmmA, xmmB, xmmC);
  e->vpminuw(xmmA, xmmB, anyptr_gpC);
  e->vpminuw(ymmA, ymmB, ymmC);
  e->vpminuw(ymmA, ymmB, anyptr_gpC);
  e->vpminuw(zmmA, zmmB, zmmC);
  e->vpminuw(zmmA, zmmB, anyptr_gpC);
  e->vpmovb2m(kA, xmmB);
  e->vpmovb2m(kA, ymmB);
  e->vpmovb2m(kA, zmmB);
  e->vpmovd2m(kA, xmmB);
  e->vpmovd2m(kA, ymmB);
  e->vpmovd2m(kA, zmmB);
  e->vpmovdb(xmmA, xmmB);
  e->vpmovdb(anyptr_gpA, xmmB);
  e->vpmovdb(xmmA, ymmB);
  e->vpmovdb(anyptr_gpA, ymmB);
  e->vpmovdb(xmmA, zmmB);
  e->vpmovdb(anyptr_gpA, zmmB);
  e->vpmovdw(xmmA, xmmB);
  e->vpmovdw(anyptr_gpA, xmmB);
  e->vpmovdw(xmmA, ymmB);
  e->vpmovdw(anyptr_gpA, ymmB);
  e->vpmovdw(ymmA, zmmB);
  e->vpmovdw(anyptr_gpA, zmmB);
  e->vpmovm2b(xmmA, kB);
  e->vpmovm2b(ymmA, kB);
  e->vpmovm2b(zmmA, kB);
  e->vpmovm2d(xmmA, kB);
  e->vpmovm2d(ymmA, kB);
  e->vpmovm2d(zmmA, kB);
  e->vpmovm2q(xmmA, kB);
  e->vpmovm2q(ymmA, kB);
  e->vpmovm2q(zmmA, kB);
  e->vpmovm2w(xmmA, kB);
  e->vpmovm2w(ymmA, kB);
  e->vpmovm2w(zmmA, kB);
  e->vpmovq2m(kA, xmmB);
  e->vpmovq2m(kA, ymmB);
  e->vpmovq2m(kA, zmmB);
  e->vpmovqb(xmmA, xmmB);
  e->vpmovqb(anyptr_gpA, xmmB);
  e->vpmovqb(xmmA, ymmB);
  e->vpmovqb(anyptr_gpA, ymmB);
  e->vpmovqb(xmmA, zmmB);
  e->vpmovqb(anyptr_gpA, zmmB);
  e->vpmovqd(xmmA, xmmB);
  e->vpmovqd(anyptr_gpA, xmmB);
  e->vpmovqd(xmmA, ymmB);
  e->vpmovqd(anyptr_gpA, ymmB);
  e->vpmovqd(ymmA, zmmB);
  e->vpmovqd(anyptr_gpA, zmmB);
  e->vpmovqw(xmmA, xmmB);
  e->vpmovqw(anyptr_gpA, xmmB);
  e->vpmovqw(xmmA, ymmB);
  e->vpmovqw(anyptr_gpA, ymmB);
  e->vpmovqw(xmmA, zmmB);
  e->vpmovqw(anyptr_gpA, zmmB);
  e->vpmovsdb(xmmA, xmmB);
  e->vpmovsdb(anyptr_gpA, xmmB);
  e->vpmovsdb(xmmA, ymmB);
  e->vpmovsdb(anyptr_gpA, ymmB);
  e->vpmovsdb(xmmA, zmmB);
  e->vpmovsdb(anyptr_gpA, zmmB);
  e->vpmovsdw(xmmA, xmmB);
  e->vpmovsdw(anyptr_gpA, xmmB);
  e->vpmovsdw(xmmA, ymmB);
  e->vpmovsdw(anyptr_gpA, ymmB);
  e->vpmovsdw(ymmA, zmmB);
  e->vpmovsdw(anyptr_gpA, zmmB);
  e->vpmovsqb(xmmA, xmmB);
  e->vpmovsqb(anyptr_gpA, xmmB);
  e->vpmovsqb(xmmA, ymmB);
  e->vpmovsqb(anyptr_gpA, ymmB);
  e->vpmovsqb(xmmA, zmmB);
  e->vpmovsqb(anyptr_gpA, zmmB);
  e->vpmovsqd(xmmA, xmmB);
  e->vpmovsqd(anyptr_gpA, xmmB);
  e->vpmovsqd(xmmA, ymmB);
  e->vpmovsqd(anyptr_gpA, ymmB);
  e->vpmovsqd(ymmA, zmmB);
  e->vpmovsqd(anyptr_gpA, zmmB);
  e->vpmovsqw(xmmA, xmmB);
  e->vpmovsqw(anyptr_gpA, xmmB);
  e->vpmovsqw(xmmA, ymmB);
  e->vpmovsqw(anyptr_gpA, ymmB);
  e->vpmovsqw(xmmA, zmmB);
  e->vpmovsqw(anyptr_gpA, zmmB);
  e->vpmovswb(xmmA, xmmB);
  e->vpmovswb(anyptr_gpA, xmmB);
  e->vpmovswb(xmmA, ymmB);
  e->vpmovswb(anyptr_gpA, ymmB);
  e->vpmovswb(ymmA, zmmB);
  e->vpmovswb(anyptr_gpA, zmmB);
  e->vpmovsxbd(xmmA, xmmB);
  e->vpmovsxbd(xmmA, anyptr_gpB);
  e->vpmovsxbd(ymmA, xmmB);
  e->vpmovsxbd(ymmA, anyptr_gpB);
  e->vpmovsxbd(zmmA, xmmB);
  e->vpmovsxbd(zmmA, anyptr_gpB);
  e->vpmovsxbq(xmmA, xmmB);
  e->vpmovsxbq(xmmA, anyptr_gpB);
  e->vpmovsxbq(ymmA, xmmB);
  e->vpmovsxbq(ymmA, anyptr_gpB);
  e->vpmovsxbq(zmmA, xmmB);
  e->vpmovsxbq(zmmA, anyptr_gpB);
  e->vpmovsxbw(xmmA, xmmB);
  e->vpmovsxbw(xmmA, anyptr_gpB);
  e->vpmovsxbw(ymmA, xmmB);
  e->vpmovsxbw(ymmA, anyptr_gpB);
  e->vpmovsxbw(zmmA, ymmB);
  e->vpmovsxbw(zmmA, anyptr_gpB);
  e->vpmovsxdq(xmmA, xmmB);
  e->vpmovsxdq(xmmA, anyptr_gpB);
  e->vpmovsxdq(ymmA, xmmB);
  e->vpmovsxdq(ymmA, anyptr_gpB);
  e->vpmovsxdq(zmmA, ymmB);
  e->vpmovsxdq(zmmA, anyptr_gpB);
  e->vpmovsxwd(xmmA, xmmB);
  e->vpmovsxwd(xmmA, anyptr_gpB);
  e->vpmovsxwd(ymmA, xmmB);
  e->vpmovsxwd(ymmA, anyptr_gpB);
  e->vpmovsxwd(zmmA, ymmB);
  e->vpmovsxwd(zmmA, anyptr_gpB);
  e->vpmovsxwq(xmmA, xmmB);
  e->vpmovsxwq(xmmA, anyptr_gpB);
  e->vpmovsxwq(ymmA, xmmB);
  e->vpmovsxwq(ymmA, anyptr_gpB);
  e->vpmovsxwq(zmmA, xmmB);
  e->vpmovsxwq(zmmA, anyptr_gpB);
  e->vpmovusdb(xmmA, xmmB);
  e->vpmovusdb(anyptr_gpA, xmmB);
  e->vpmovusdb(xmmA, ymmB);
  e->vpmovusdb(anyptr_gpA, ymmB);
  e->vpmovusdb(xmmA, zmmB);
  e->vpmovusdb(anyptr_gpA, zmmB);
  e->vpmovusdw(xmmA, xmmB);
  e->vpmovusdw(anyptr_gpA, xmmB);
  e->vpmovusdw(xmmA, ymmB);
  e->vpmovusdw(anyptr_gpA, ymmB);
  e->vpmovusdw(ymmA, zmmB);
  e->vpmovusdw(anyptr_gpA, zmmB);
  e->vpmovusqb(xmmA, xmmB);
  e->vpmovusqb(anyptr_gpA, xmmB);
  e->vpmovusqb(xmmA, ymmB);
  e->vpmovusqb(anyptr_gpA, ymmB);
  e->vpmovusqb(xmmA, zmmB);
  e->vpmovusqb(anyptr_gpA, zmmB);
  e->vpmovusqd(xmmA, xmmB);
  e->vpmovusqd(anyptr_gpA, xmmB);
  e->vpmovusqd(xmmA, ymmB);
  e->vpmovusqd(anyptr_gpA, ymmB);
  e->vpmovusqd(ymmA, zmmB);
  e->vpmovusqd(anyptr_gpA, zmmB);
  e->vpmovusqw(xmmA, xmmB);
  e->vpmovusqw(anyptr_gpA, xmmB);
  e->vpmovusqw(xmmA, ymmB);
  e->vpmovusqw(anyptr_gpA, ymmB);
  e->vpmovusqw(xmmA, zmmB);
  e->vpmovusqw(anyptr_gpA, zmmB);
  e->vpmovuswb(xmmA, xmmB);
  e->vpmovuswb(anyptr_gpA, xmmB);
  e->vpmovuswb(xmmA, ymmB);
  e->vpmovuswb(anyptr_gpA, ymmB);
  e->vpmovuswb(ymmA, zmmB);
  e->vpmovuswb(anyptr_gpA, zmmB);
  e->vpmovw2m(kA, xmmB);
  e->vpmovw2m(kA, ymmB);
  e->vpmovw2m(kA, zmmB);
  e->vpmovwb(xmmA, xmmB);
  e->vpmovwb(anyptr_gpA, xmmB);
  e->vpmovwb(xmmA, ymmB);
  e->vpmovwb(anyptr_gpA, ymmB);
  e->vpmovwb(ymmA, zmmB);
  e->vpmovwb(anyptr_gpA, zmmB);
  e->vpmovzxbd(xmmA, xmmB);
  e->vpmovzxbd(xmmA, anyptr_gpB);
  e->vpmovzxbd(ymmA, xmmB);
  e->vpmovzxbd(ymmA, anyptr_gpB);
  e->vpmovzxbd(zmmA, xmmB);
  e->vpmovzxbd(zmmA, anyptr_gpB);
  e->vpmovzxbq(xmmA, xmmB);
  e->vpmovzxbq(xmmA, anyptr_gpB);
  e->vpmovzxbq(ymmA, xmmB);
  e->vpmovzxbq(ymmA, anyptr_gpB);
  e->vpmovzxbq(zmmA, xmmB);
  e->vpmovzxbq(zmmA, anyptr_gpB);
  e->vpmovzxbw(xmmA, xmmB);
  e->vpmovzxbw(xmmA, anyptr_gpB);
  e->vpmovzxbw(ymmA, xmmB);
  e->vpmovzxbw(ymmA, anyptr_gpB);
  e->vpmovzxbw(zmmA, ymmB);
  e->vpmovzxbw(zmmA, anyptr_gpB);
  e->vpmovzxdq(xmmA, xmmB);
  e->vpmovzxdq(xmmA, anyptr_gpB);
  e->vpmovzxdq(ymmA, xmmB);
  e->vpmovzxdq(ymmA, anyptr_gpB);
  e->vpmovzxdq(zmmA, ymmB);
  e->vpmovzxdq(zmmA, anyptr_gpB);
  e->vpmovzxwd(xmmA, xmmB);
  e->vpmovzxwd(xmmA, anyptr_gpB);
  e->vpmovzxwd(ymmA, xmmB);
  e->vpmovzxwd(ymmA, anyptr_gpB);
  e->vpmovzxwd(zmmA, ymmB);
  e->vpmovzxwd(zmmA, anyptr_gpB);
  e->vpmovzxwq(xmmA, xmmB);
  e->vpmovzxwq(xmmA, anyptr_gpB);
  e->vpmovzxwq(ymmA, xmmB);
  e->vpmovzxwq(ymmA, anyptr_gpB);
  e->vpmovzxwq(zmmA, xmmB);
  e->vpmovzxwq(zmmA, anyptr_gpB);
  e->vpmuldq(xmmA, xmmB, xmmC);
  e->vpmuldq(xmmA, xmmB, anyptr_gpC);
  e->vpmuldq(ymmA, ymmB, ymmC);
  e->vpmuldq(ymmA, ymmB, anyptr_gpC);
  e->vpmuldq(zmmA, zmmB, zmmC);
  e->vpmuldq(zmmA, zmmB, anyptr_gpC);
  e->vpmulhrsw(xmmA, xmmB, xmmC);
  e->vpmulhrsw(xmmA, xmmB, anyptr_gpC);
  e->vpmulhrsw(ymmA, ymmB, ymmC);
  e->vpmulhrsw(ymmA, ymmB, anyptr_gpC);
  e->vpmulhrsw(zmmA, zmmB, zmmC);
  e->vpmulhrsw(zmmA, zmmB, anyptr_gpC);
  e->vpmulhuw(xmmA, xmmB, xmmC);
  e->vpmulhuw(xmmA, xmmB, anyptr_gpC);
  e->vpmulhuw(ymmA, ymmB, ymmC);
  e->vpmulhuw(ymmA, ymmB, anyptr_gpC);
  e->vpmulhuw(zmmA, zmmB, zmmC);
  e->vpmulhuw(zmmA, zmmB, anyptr_gpC);
  e->vpmulhw(xmmA, xmmB, xmmC);
  e->vpmulhw(xmmA, xmmB, anyptr_gpC);
  e->vpmulhw(ymmA, ymmB, ymmC);
  e->vpmulhw(ymmA, ymmB, anyptr_gpC);
  e->vpmulhw(zmmA, zmmB, zmmC);
  e->vpmulhw(zmmA, zmmB, anyptr_gpC);
  e->vpmulld(xmmA, xmmB, xmmC);
  e->vpmulld(xmmA, xmmB, anyptr_gpC);
  e->vpmulld(ymmA, ymmB, ymmC);
  e->vpmulld(ymmA, ymmB, anyptr_gpC);
  e->vpmulld(zmmA, zmmB, zmmC);
  e->vpmulld(zmmA, zmmB, anyptr_gpC);
  e->vpmullq(xmmA, xmmB, xmmC);
  e->vpmullq(xmmA, xmmB, anyptr_gpC);
  e->vpmullq(ymmA, ymmB, ymmC);
  e->vpmullq(ymmA, ymmB, anyptr_gpC);
  e->vpmullq(zmmA, zmmB, zmmC);
  e->vpmullq(zmmA, zmmB, anyptr_gpC);
  e->vpmullw(xmmA, xmmB, xmmC);
  e->vpmullw(xmmA, xmmB, anyptr_gpC);
  e->vpmullw(ymmA, ymmB, ymmC);
  e->vpmullw(ymmA, ymmB, anyptr_gpC);
  e->vpmullw(zmmA, zmmB, zmmC);
  e->vpmullw(zmmA, zmmB, anyptr_gpC);
  e->vpmultishiftqb(xmmA, xmmB, xmmC);
  e->vpmultishiftqb(xmmA, xmmB, anyptr_gpC);
  e->vpmultishiftqb(ymmA, ymmB, ymmC);
  e->vpmultishiftqb(ymmA, ymmB, anyptr_gpC);
  e->vpmultishiftqb(zmmA, zmmB, zmmC);
  e->vpmultishiftqb(zmmA, zmmB, anyptr_gpC);
  e->vpmuludq(xmmA, xmmB, xmmC);
  e->vpmuludq(xmmA, xmmB, anyptr_gpC);
  e->vpmuludq(ymmA, ymmB, ymmC);
  e->vpmuludq(ymmA, ymmB, anyptr_gpC);
  e->vpmuludq(zmmA, zmmB, zmmC);
  e->vpmuludq(zmmA, zmmB, anyptr_gpC);
  e->vpopcntd(zmmA, zmmB);
  e->vpopcntd(zmmA, anyptr_gpB);
  e->vpopcntq(zmmA, zmmB);
  e->vpopcntq(zmmA, anyptr_gpB);
  e->vpord(xmmA, xmmB, xmmC);
  e->vpord(xmmA, xmmB, anyptr_gpC);
  e->vpord(ymmA, ymmB, ymmC);
  e->vpord(ymmA, ymmB, anyptr_gpC);
  e->vpord(zmmA, zmmB, zmmC);
  e->vpord(zmmA, zmmB, anyptr_gpC);
  e->vporq(xmmA, xmmB, xmmC);
  e->vporq(xmmA, xmmB, anyptr_gpC);
  e->vporq(ymmA, ymmB, ymmC);
  e->vporq(ymmA, ymmB, anyptr_gpC);
  e->vporq(zmmA, zmmB, zmmC);
  e->vporq(zmmA, zmmB, anyptr_gpC);
  e->vprold(xmmA, xmmB, 0);
  e->vprold(xmmA, anyptr_gpB, 0);
  e->vprold(ymmA, ymmB, 0);
  e->vprold(ymmA, anyptr_gpB, 0);
  e->vprold(zmmA, zmmB, 0);
  e->vprold(zmmA, anyptr_gpB, 0);
  e->vprolq(xmmA, xmmB, 0);
  e->vprolq(xmmA, anyptr_gpB, 0);
  e->vprolq(ymmA, ymmB, 0);
  e->vprolq(ymmA, anyptr_gpB, 0);
  e->vprolq(zmmA, zmmB, 0);
  e->vprolq(zmmA, anyptr_gpB, 0);
  e->vprolvd(xmmA, xmmB, xmmC);
  e->vprolvd(xmmA, xmmB, anyptr_gpC);
  e->vprolvd(ymmA, ymmB, ymmC);
  e->vprolvd(ymmA, ymmB, anyptr_gpC);
  e->vprolvd(zmmA, zmmB, zmmC);
  e->vprolvd(zmmA, zmmB, anyptr_gpC);
  e->vprolvq(xmmA, xmmB, xmmC);
  e->vprolvq(xmmA, xmmB, anyptr_gpC);
  e->vprolvq(ymmA, ymmB, ymmC);
  e->vprolvq(ymmA, ymmB, anyptr_gpC);
  e->vprolvq(zmmA, zmmB, zmmC);
  e->vprolvq(zmmA, zmmB, anyptr_gpC);
  e->vprord(xmmA, xmmB, 0);
  e->vprord(xmmA, anyptr_gpB, 0);
  e->vprord(ymmA, ymmB, 0);
  e->vprord(ymmA, anyptr_gpB, 0);
  e->vprord(zmmA, zmmB, 0);
  e->vprord(zmmA, anyptr_gpB, 0);
  e->vprorq(xmmA, xmmB, 0);
  e->vprorq(xmmA, anyptr_gpB, 0);
  e->vprorq(ymmA, ymmB, 0);
  e->vprorq(ymmA, anyptr_gpB, 0);
  e->vprorq(zmmA, zmmB, 0);
  e->vprorq(zmmA, anyptr_gpB, 0);
  e->vprorvd(xmmA, xmmB, xmmC);
  e->vprorvd(xmmA, xmmB, anyptr_gpC);
  e->vprorvd(ymmA, ymmB, ymmC);
  e->vprorvd(ymmA, ymmB, anyptr_gpC);
  e->vprorvd(zmmA, zmmB, zmmC);
  e->vprorvd(zmmA, zmmB, anyptr_gpC);
  e->vprorvq(xmmA, xmmB, xmmC);
  e->vprorvq(xmmA, xmmB, anyptr_gpC);
  e->vprorvq(ymmA, ymmB, ymmC);
  e->vprorvq(ymmA, ymmB, anyptr_gpC);
  e->vprorvq(zmmA, zmmB, zmmC);
  e->vprorvq(zmmA, zmmB, anyptr_gpC);
  e->vpsadbw(xmmA, xmmB, xmmC);
  e->vpsadbw(xmmA, xmmB, anyptr_gpC);
  e->vpsadbw(ymmA, ymmB, ymmC);
  e->vpsadbw(ymmA, ymmB, anyptr_gpC);
  e->vpsadbw(zmmA, zmmB, zmmC);
  e->vpsadbw(zmmA, zmmB, anyptr_gpC);
  e->vpscatterdd(vx_ptr, xmmB);
  e->vpscatterdd(vy_ptr, ymmB);
  e->vpscatterdd(vz_ptr, zmmB);
  e->vpscatterdq(vx_ptr, xmmB);
  e->vpscatterdq(vy_ptr, ymmB);
  e->vpscatterdq(vz_ptr, zmmB);
  e->vpscatterqd(vx_ptr, xmmB);
  e->vpscatterqd(vy_ptr, xmmB);
  e->vpscatterqd(vz_ptr, ymmB);
  e->vpscatterqq(vx_ptr, xmmB);
  e->vpscatterqq(vy_ptr, ymmB);
  e->vpscatterqq(vz_ptr, zmmB);
  e->vpshufb(xmmA, xmmB, xmmC);
  e->vpshufb(xmmA, xmmB, anyptr_gpC);
  e->vpshufb(ymmA, ymmB, ymmC);
  e->vpshufb(ymmA, ymmB, anyptr_gpC);
  e->vpshufb(zmmA, zmmB, zmmC);
  e->vpshufb(zmmA, zmmB, anyptr_gpC);
  e->vpshufd(xmmA, xmmB, 0);
  e->vpshufd(xmmA, anyptr_gpB, 0);
  e->vpshufd(ymmA, ymmB, 0);
  e->vpshufd(ymmA, anyptr_gpB, 0);
  e->vpshufd(zmmA, zmmB, 0);
  e->vpshufd(zmmA, anyptr_gpB, 0);
  e->vpshufhw(xmmA, xmmB, 0);
  e->vpshufhw(xmmA, anyptr_gpB, 0);
  e->vpshufhw(ymmA, ymmB, 0);
  e->vpshufhw(ymmA, anyptr_gpB, 0);
  e->vpshufhw(zmmA, zmmB, 0);
  e->vpshufhw(zmmA, anyptr_gpB, 0);
  e->vpshuflw(xmmA, xmmB, 0);
  e->vpshuflw(xmmA, anyptr_gpB, 0);
  e->vpshuflw(ymmA, ymmB, 0);
  e->vpshuflw(ymmA, anyptr_gpB, 0);
  e->vpshuflw(zmmA, zmmB, 0);
  e->vpshuflw(zmmA, anyptr_gpB, 0);
  e->vpslld(xmmA, xmmB, xmmC);
  e->vpslld(xmmA, xmmB, anyptr_gpC);
  e->vpslld(xmmA, xmmB, 0);
  e->vpslld(xmmA, anyptr_gpB, 0);
  e->vpslld(ymmA, ymmB, xmmC);
  e->vpslld(ymmA, ymmB, anyptr_gpC);
  e->vpslld(ymmA, ymmB, 0);
  e->vpslld(ymmA, anyptr_gpB, 0);
  e->vpslld(zmmA, zmmB, xmmC);
  e->vpslld(zmmA, zmmB, anyptr_gpC);
  e->vpslld(zmmA, zmmB, 0);
  e->vpslld(zmmA, anyptr_gpB, 0);
  e->vpslldq(xmmA, xmmB, 0);
  e->vpslldq(xmmA, anyptr_gpB, 0);
  e->vpslldq(ymmA, ymmB, 0);
  e->vpslldq(ymmA, anyptr_gpB, 0);
  e->vpslldq(zmmA, zmmB, 0);
  e->vpslldq(zmmA, anyptr_gpB, 0);
  e->vpsllq(xmmA, xmmB, xmmC);
  e->vpsllq(xmmA, xmmB, anyptr_gpC);
  e->vpsllq(xmmA, xmmB, 0);
  e->vpsllq(xmmA, anyptr_gpB, 0);
  e->vpsllq(ymmA, ymmB, xmmC);
  e->vpsllq(ymmA, ymmB, anyptr_gpC);
  e->vpsllq(ymmA, ymmB, 0);
  e->vpsllq(ymmA, anyptr_gpB, 0);
  e->vpsllq(zmmA, zmmB, xmmC);
  e->vpsllq(zmmA, zmmB, anyptr_gpC);
  e->vpsllq(zmmA, zmmB, 0);
  e->vpsllq(zmmA, anyptr_gpB, 0);
  e->vpsllvd(xmmA, xmmB, xmmC);
  e->vpsllvd(xmmA, xmmB, anyptr_gpC);
  e->vpsllvd(ymmA, ymmB, ymmC);
  e->vpsllvd(ymmA, ymmB, anyptr_gpC);
  e->vpsllvd(zmmA, zmmB, zmmC);
  e->vpsllvd(zmmA, zmmB, anyptr_gpC);
  e->vpsllvq(xmmA, xmmB, xmmC);
  e->vpsllvq(xmmA, xmmB, anyptr_gpC);
  e->vpsllvq(ymmA, ymmB, ymmC);
  e->vpsllvq(ymmA, ymmB, anyptr_gpC);
  e->vpsllvq(zmmA, zmmB, zmmC);
  e->vpsllvq(zmmA, zmmB, anyptr_gpC);
  e->vpsllvw(xmmA, xmmB, xmmC);
  e->vpsllvw(xmmA, xmmB, anyptr_gpC);
  e->vpsllvw(ymmA, ymmB, ymmC);
  e->vpsllvw(ymmA, ymmB, anyptr_gpC);
  e->vpsllvw(zmmA, zmmB, zmmC);
  e->vpsllvw(zmmA, zmmB, anyptr_gpC);
  e->vpsllw(xmmA, xmmB, xmmC);
  e->vpsllw(xmmA, xmmB, anyptr_gpC);
  e->vpsllw(xmmA, xmmB, 0);
  e->vpsllw(xmmA, anyptr_gpB, 0);
  e->vpsllw(ymmA, ymmB, xmmC);
  e->vpsllw(ymmA, ymmB, anyptr_gpC);
  e->vpsllw(ymmA, ymmB, 0);
  e->vpsllw(ymmA, anyptr_gpB, 0);
  e->vpsllw(zmmA, zmmB, xmmC);
  e->vpsllw(zmmA, zmmB, anyptr_gpC);
  e->vpsllw(zmmA, zmmB, 0);
  e->vpsllw(zmmA, anyptr_gpB, 0);
  e->vpsrad(xmmA, xmmB, xmmC);
  e->vpsrad(xmmA, xmmB, anyptr_gpC);
  e->vpsrad(xmmA, xmmB, 0);
  e->vpsrad(xmmA, anyptr_gpB, 0);
  e->vpsrad(ymmA, ymmB, xmmC);
  e->vpsrad(ymmA, ymmB, anyptr_gpC);
  e->vpsrad(ymmA, ymmB, 0);
  e->vpsrad(ymmA, anyptr_gpB, 0);
  e->vpsrad(zmmA, zmmB, xmmC);
  e->vpsrad(zmmA, zmmB, anyptr_gpC);
  e->vpsrad(zmmA, zmmB, 0);
  e->vpsrad(zmmA, anyptr_gpB, 0);
  e->vpsraq(xmmA, xmmB, xmmC);
  e->vpsraq(xmmA, xmmB, anyptr_gpC);
  e->vpsraq(xmmA, xmmB, 0);
  e->vpsraq(xmmA, anyptr_gpB, 0);
  e->vpsraq(ymmA, ymmB, xmmC);
  e->vpsraq(ymmA, ymmB, anyptr_gpC);
  e->vpsraq(ymmA, ymmB, 0);
  e->vpsraq(ymmA, anyptr_gpB, 0);
  e->vpsraq(zmmA, zmmB, xmmC);
  e->vpsraq(zmmA, zmmB, anyptr_gpC);
  e->vpsraq(zmmA, zmmB, 0);
  e->vpsraq(zmmA, anyptr_gpB, 0);
  e->vpsravd(xmmA, xmmB, xmmC);
  e->vpsravd(xmmA, xmmB, anyptr_gpC);
  e->vpsravd(ymmA, ymmB, ymmC);
  e->vpsravd(ymmA, ymmB, anyptr_gpC);
  e->vpsravd(zmmA, zmmB, zmmC);
  e->vpsravd(zmmA, zmmB, anyptr_gpC);
  e->vpsravq(xmmA, xmmB, xmmC);
  e->vpsravq(xmmA, xmmB, anyptr_gpC);
  e->vpsravq(ymmA, ymmB, ymmC);
  e->vpsravq(ymmA, ymmB, anyptr_gpC);
  e->vpsravq(zmmA, zmmB, zmmC);
  e->vpsravq(zmmA, zmmB, anyptr_gpC);
  e->vpsravw(xmmA, xmmB, xmmC);
  e->vpsravw(xmmA, xmmB, anyptr_gpC);
  e->vpsravw(ymmA, ymmB, ymmC);
  e->vpsravw(ymmA, ymmB, anyptr_gpC);
  e->vpsravw(zmmA, zmmB, zmmC);
  e->vpsravw(zmmA, zmmB, anyptr_gpC);
  e->vpsraw(xmmA, xmmB, xmmC);
  e->vpsraw(xmmA, xmmB, anyptr_gpC);
  e->vpsraw(xmmA, xmmB, 0);
  e->vpsraw(xmmA, anyptr_gpB, 0);
  e->vpsraw(ymmA, ymmB, xmmC);
  e->vpsraw(ymmA, ymmB, anyptr_gpC);
  e->vpsraw(ymmA, ymmB, 0);
  e->vpsraw(ymmA, anyptr_gpB, 0);
  e->vpsraw(zmmA, zmmB, xmmC);
  e->vpsraw(zmmA, zmmB, anyptr_gpC);
  e->vpsraw(zmmA, zmmB, 0);
  e->vpsraw(zmmA, anyptr_gpB, 0);
  e->vpsrld(xmmA, xmmB, xmmC);
  e->vpsrld(xmmA, xmmB, anyptr_gpC);
  e->vpsrld(xmmA, xmmB, 0);
  e->vpsrld(xmmA, anyptr_gpB, 0);
  e->vpsrld(ymmA, ymmB, xmmC);
  e->vpsrld(ymmA, ymmB, anyptr_gpC);
  e->vpsrld(ymmA, ymmB, 0);
  e->vpsrld(ymmA, anyptr_gpB, 0);
  e->vpsrld(zmmA, zmmB, xmmC);
  e->vpsrld(zmmA, zmmB, anyptr_gpC);
  e->vpsrld(zmmA, zmmB, 0);
  e->vpsrld(zmmA, anyptr_gpB, 0);
  e->vpsrldq(xmmA, xmmB, 0);
  e->vpsrldq(xmmA, anyptr_gpB, 0);
  e->vpsrldq(ymmA, ymmB, 0);
  e->vpsrldq(ymmA, anyptr_gpB, 0);
  e->vpsrldq(zmmA, zmmB, 0);
  e->vpsrldq(zmmA, anyptr_gpB, 0);
  e->vpsrlq(xmmA, xmmB, xmmC);
  e->vpsrlq(xmmA, xmmB, anyptr_gpC);
  e->vpsrlq(xmmA, xmmB, 0);
  e->vpsrlq(xmmA, anyptr_gpB, 0);
  e->vpsrlq(ymmA, ymmB, xmmC);
  e->vpsrlq(ymmA, ymmB, anyptr_gpC);
  e->vpsrlq(ymmA, ymmB, 0);
  e->vpsrlq(ymmA, anyptr_gpB, 0);
  e->vpsrlq(zmmA, zmmB, xmmC);
  e->vpsrlq(zmmA, zmmB, anyptr_gpC);
  e->vpsrlq(zmmA, zmmB, 0);
  e->vpsrlq(zmmA, anyptr_gpB, 0);
  e->vpsrlvd(xmmA, xmmB, xmmC);
  e->vpsrlvd(xmmA, xmmB, anyptr_gpC);
  e->vpsrlvd(ymmA, ymmB, ymmC);
  e->vpsrlvd(ymmA, ymmB, anyptr_gpC);
  e->vpsrlvd(zmmA, zmmB, zmmC);
  e->vpsrlvd(zmmA, zmmB, anyptr_gpC);
  e->vpsrlvq(xmmA, xmmB, xmmC);
  e->vpsrlvq(xmmA, xmmB, anyptr_gpC);
  e->vpsrlvq(ymmA, ymmB, ymmC);
  e->vpsrlvq(ymmA, ymmB, anyptr_gpC);
  e->vpsrlvq(zmmA, zmmB, zmmC);
  e->vpsrlvq(zmmA, zmmB, anyptr_gpC);
  e->vpsrlvw(xmmA, xmmB, xmmC);
  e->vpsrlvw(xmmA, xmmB, anyptr_gpC);
  e->vpsrlvw(ymmA, ymmB, ymmC);
  e->vpsrlvw(ymmA, ymmB, anyptr_gpC);
  e->vpsrlvw(zmmA, zmmB, zmmC);
  e->vpsrlvw(zmmA, zmmB, anyptr_gpC);
  e->vpsrlw(xmmA, xmmB, xmmC);
  e->vpsrlw(xmmA, xmmB, anyptr_gpC);
  e->vpsrlw(xmmA, xmmB, 0);
  e->vpsrlw(xmmA, anyptr_gpB, 0);
  e->vpsrlw(ymmA, ymmB, xmmC);
  e->vpsrlw(ymmA, ymmB, anyptr_gpC);
  e->vpsrlw(ymmA, ymmB, 0);
  e->vpsrlw(ymmA, anyptr_gpB, 0);
  e->vpsrlw(zmmA, zmmB, xmmC);
  e->vpsrlw(zmmA, zmmB, anyptr_gpC);
  e->vpsrlw(zmmA, zmmB, 0);
  e->vpsrlw(zmmA, anyptr_gpB, 0);
  e->vpsubb(xmmA, xmmB, xmmC);
  e->vpsubb(xmmA, xmmB, anyptr_gpC);
  e->vpsubb(ymmA, ymmB, ymmC);
  e->vpsubb(ymmA, ymmB, anyptr_gpC);
  e->vpsubb(zmmA, zmmB, zmmC);
  e->vpsubb(zmmA, zmmB, anyptr_gpC);
  e->vpsubd(xmmA, xmmB, xmmC);
  e->vpsubd(xmmA, xmmB, anyptr_gpC);
  e->vpsubd(ymmA, ymmB, ymmC);
  e->vpsubd(ymmA, ymmB, anyptr_gpC);
  e->vpsubd(zmmA, zmmB, zmmC);
  e->vpsubd(zmmA, zmmB, anyptr_gpC);
  e->vpsubq(xmmA, xmmB, xmmC);
  e->vpsubq(xmmA, xmmB, anyptr_gpC);
  e->vpsubq(ymmA, ymmB, ymmC);
  e->vpsubq(ymmA, ymmB, anyptr_gpC);
  e->vpsubq(zmmA, zmmB, zmmC);
  e->vpsubq(zmmA, zmmB, anyptr_gpC);
  e->vpsubsb(xmmA, xmmB, xmmC);
  e->vpsubsb(xmmA, xmmB, anyptr_gpC);
  e->vpsubsb(ymmA, ymmB, ymmC);
  e->vpsubsb(ymmA, ymmB, anyptr_gpC);
  e->vpsubsb(zmmA, zmmB, zmmC);
  e->vpsubsb(zmmA, zmmB, anyptr_gpC);
  e->vpsubsw(xmmA, xmmB, xmmC);
  e->vpsubsw(xmmA, xmmB, anyptr_gpC);
  e->vpsubsw(ymmA, ymmB, ymmC);
  e->vpsubsw(ymmA, ymmB, anyptr_gpC);
  e->vpsubsw(zmmA, zmmB, zmmC);
  e->vpsubsw(zmmA, zmmB, anyptr_gpC);
  e->vpsubusb(xmmA, xmmB, xmmC);
  e->vpsubusb(xmmA, xmmB, anyptr_gpC);
  e->vpsubusb(ymmA, ymmB, ymmC);
  e->vpsubusb(ymmA, ymmB, anyptr_gpC);
  e->vpsubusb(zmmA, zmmB, zmmC);
  e->vpsubusb(zmmA, zmmB, anyptr_gpC);
  e->vpsubusw(xmmA, xmmB, xmmC);
  e->vpsubusw(xmmA, xmmB, anyptr_gpC);
  e->vpsubusw(ymmA, ymmB, ymmC);
  e->vpsubusw(ymmA, ymmB, anyptr_gpC);
  e->vpsubusw(zmmA, zmmB, zmmC);
  e->vpsubusw(zmmA, zmmB, anyptr_gpC);
  e->vpsubw(xmmA, xmmB, xmmC);
  e->vpsubw(xmmA, xmmB, anyptr_gpC);
  e->vpsubw(ymmA, ymmB, ymmC);
  e->vpsubw(ymmA, ymmB, anyptr_gpC);
  e->vpsubw(zmmA, zmmB, zmmC);
  e->vpsubw(zmmA, zmmB, anyptr_gpC);
  e->vpternlogd(xmmA, xmmB, xmmC, 0);
  e->vpternlogd(xmmA, xmmB, anyptr_gpC, 0);
  e->vpternlogd(ymmA, ymmB, ymmC, 0);
  e->vpternlogd(ymmA, ymmB, anyptr_gpC, 0);
  e->vpternlogd(zmmA, zmmB, zmmC, 0);
  e->vpternlogd(zmmA, zmmB, anyptr_gpC, 0);
  e->vpternlogq(xmmA, xmmB, xmmC, 0);
  e->vpternlogq(xmmA, xmmB, anyptr_gpC, 0);
  e->vpternlogq(ymmA, ymmB, ymmC, 0);
  e->vpternlogq(ymmA, ymmB, anyptr_gpC, 0);
  e->vpternlogq(zmmA, zmmB, zmmC, 0);
  e->vpternlogq(zmmA, zmmB, anyptr_gpC, 0);
  e->vptestmb(kA, xmmB, xmmC);
  e->vptestmb(kA, xmmB, anyptr_gpC);
  e->vptestmb(kA, ymmB, ymmC);
  e->vptestmb(kA, ymmB, anyptr_gpC);
  e->vptestmb(kA, zmmB, zmmC);
  e->vptestmb(kA, zmmB, anyptr_gpC);
  e->vptestmd(kA, xmmB, xmmC);
  e->vptestmd(kA, xmmB, anyptr_gpC);
  e->vptestmd(kA, ymmB, ymmC);
  e->vptestmd(kA, ymmB, anyptr_gpC);
  e->vptestmd(kA, zmmB, zmmC);
  e->vptestmd(kA, zmmB, anyptr_gpC);
  e->vptestmq(kA, xmmB, xmmC);
  e->vptestmq(kA, xmmB, anyptr_gpC);
  e->vptestmq(kA, ymmB, ymmC);
  e->vptestmq(kA, ymmB, anyptr_gpC);
  e->vptestmq(kA, zmmB, zmmC);
  e->vptestmq(kA, zmmB, anyptr_gpC);
  e->vptestmw(kA, xmmB, xmmC);
  e->vptestmw(kA, xmmB, anyptr_gpC);
  e->vptestmw(kA, ymmB, ymmC);
  e->vptestmw(kA, ymmB, anyptr_gpC);
  e->vptestmw(kA, zmmB, zmmC);
  e->vptestmw(kA, zmmB, anyptr_gpC);
  e->vptestnmb(kA, xmmB, xmmC);
  e->vptestnmb(kA, xmmB, anyptr_gpC);
  e->vptestnmb(kA, ymmB, ymmC);
  e->vptestnmb(kA, ymmB, anyptr_gpC);
  e->vptestnmb(kA, zmmB, zmmC);
  e->vptestnmb(kA, zmmB, anyptr_gpC);
  e->vptestnmd(kA, xmmB, xmmC);
  e->vptestnmd(kA, xmmB, anyptr_gpC);
  e->vptestnmd(kA, ymmB, ymmC);
  e->vptestnmd(kA, ymmB, anyptr_gpC);
  e->vptestnmd(kA, zmmB, zmmC);
  e->vptestnmd(kA, zmmB, anyptr_gpC);
  e->vptestnmq(kA, xmmB, xmmC);
  e->vptestnmq(kA, xmmB, anyptr_gpC);
  e->vptestnmq(kA, ymmB, ymmC);
  e->vptestnmq(kA, ymmB, anyptr_gpC);
  e->vptestnmq(kA, zmmB, zmmC);
  e->vptestnmq(kA, zmmB, anyptr_gpC);
  e->vptestnmw(kA, xmmB, xmmC);
  e->vptestnmw(kA, xmmB, anyptr_gpC);
  e->vptestnmw(kA, ymmB, ymmC);
  e->vptestnmw(kA, ymmB, anyptr_gpC);
  e->vptestnmw(kA, zmmB, zmmC);
  e->vptestnmw(kA, zmmB, anyptr_gpC);
  e->vpunpckhbw(xmmA, xmmB, xmmC);
  e->vpunpckhbw(xmmA, xmmB, anyptr_gpC);
  e->vpunpckhbw(ymmA, ymmB, ymmC);
  e->vpunpckhbw(ymmA, ymmB, anyptr_gpC);
  e->vpunpckhbw(zmmA, zmmB, zmmC);
  e->vpunpckhbw(zmmA, zmmB, anyptr_gpC);
  e->vpunpckhdq(xmmA, xmmB, xmmC);
  e->vpunpckhdq(xmmA, xmmB, anyptr_gpC);
  e->vpunpckhdq(ymmA, ymmB, ymmC);
  e->vpunpckhdq(ymmA, ymmB, anyptr_gpC);
  e->vpunpckhdq(zmmA, zmmB, zmmC);
  e->vpunpckhdq(zmmA, zmmB, anyptr_gpC);
  e->vpunpckhqdq(xmmA, xmmB, xmmC);
  e->vpunpckhqdq(xmmA, xmmB, anyptr_gpC);
  e->vpunpckhqdq(ymmA, ymmB, ymmC);
  e->vpunpckhqdq(ymmA, ymmB, anyptr_gpC);
  e->vpunpckhqdq(zmmA, zmmB, zmmC);
  e->vpunpckhqdq(zmmA, zmmB, anyptr_gpC);
  e->vpunpckhwd(xmmA, xmmB, xmmC);
  e->vpunpckhwd(xmmA, xmmB, anyptr_gpC);
  e->vpunpckhwd(ymmA, ymmB, ymmC);
  e->vpunpckhwd(ymmA, ymmB, anyptr_gpC);
  e->vpunpckhwd(zmmA, zmmB, zmmC);
  e->vpunpckhwd(zmmA, zmmB, anyptr_gpC);
  e->vpunpcklbw(xmmA, xmmB, xmmC);
  e->vpunpcklbw(xmmA, xmmB, anyptr_gpC);
  e->vpunpcklbw(ymmA, ymmB, ymmC);
  e->vpunpcklbw(ymmA, ymmB, anyptr_gpC);
  e->vpunpcklbw(zmmA, zmmB, zmmC);
  e->vpunpcklbw(zmmA, zmmB, anyptr_gpC);
  e->vpunpckldq(xmmA, xmmB, xmmC);
  e->vpunpckldq(xmmA, xmmB, anyptr_gpC);
  e->vpunpckldq(ymmA, ymmB, ymmC);
  e->vpunpckldq(ymmA, ymmB, anyptr_gpC);
  e->vpunpckldq(zmmA, zmmB, zmmC);
  e->vpunpckldq(zmmA, zmmB, anyptr_gpC);
  e->vpunpcklqdq(xmmA, xmmB, xmmC);
  e->vpunpcklqdq(xmmA, xmmB, anyptr_gpC);
  e->vpunpcklqdq(ymmA, ymmB, ymmC);
  e->vpunpcklqdq(ymmA, ymmB, anyptr_gpC);
  e->vpunpcklqdq(zmmA, zmmB, zmmC);
  e->vpunpcklqdq(zmmA, zmmB, anyptr_gpC);
  e->vpunpcklwd(xmmA, xmmB, xmmC);
  e->vpunpcklwd(xmmA, xmmB, anyptr_gpC);
  e->vpunpcklwd(ymmA, ymmB, ymmC);
  e->vpunpcklwd(ymmA, ymmB, anyptr_gpC);
  e->vpunpcklwd(zmmA, zmmB, zmmC);
  e->vpunpcklwd(zmmA, zmmB, anyptr_gpC);
  e->vpxord(xmmA, xmmB, xmmC);
  e->vpxord(xmmA, xmmB, anyptr_gpC);
  e->vpxord(ymmA, ymmB, ymmC);
  e->vpxord(ymmA, ymmB, anyptr_gpC);
  e->vpxord(zmmA, zmmB, zmmC);
  e->vpxord(zmmA, zmmB, anyptr_gpC);
  e->vpxorq(xmmA, xmmB, xmmC);
  e->vpxorq(xmmA, xmmB, anyptr_gpC);
  e->vpxorq(ymmA, ymmB, ymmC);
  e->vpxorq(ymmA, ymmB, anyptr_gpC);
  e->vpxorq(zmmA, zmmB, zmmC);
  e->vpxorq(zmmA, zmmB, anyptr_gpC);
  e->vrangepd(xmmA, xmmB, xmmC, 0);
  e->vrangepd(xmmA, xmmB, anyptr_gpC, 0);
  e->vrangepd(ymmA, ymmB, ymmC, 0);
  e->vrangepd(ymmA, ymmB, anyptr_gpC, 0);
  e->vrangepd(zmmA, zmmB, zmmC, 0);
  e->vrangepd(zmmA, zmmB, anyptr_gpC, 0);
  e->vrangeps(xmmA, xmmB, xmmC, 0);
  e->vrangeps(xmmA, xmmB, anyptr_gpC, 0);
  e->vrangeps(ymmA, ymmB, ymmC, 0);
  e->vrangeps(ymmA, ymmB, anyptr_gpC, 0);
  e->vrangeps(zmmA, zmmB, zmmC, 0);
  e->vrangeps(zmmA, zmmB, anyptr_gpC, 0);
  e->vrangesd(xmmA, xmmB, xmmC, 0);
  e->vrangesd(xmmA, xmmB, anyptr_gpC, 0);
  e->vrangess(xmmA, xmmB, xmmC, 0);
  e->vrangess(xmmA, xmmB, anyptr_gpC, 0);
  e->vrcp14pd(xmmA, xmmB);
  e->vrcp14pd(xmmA, anyptr_gpB);
  e->vrcp14pd(ymmA, ymmB);
  e->vrcp14pd(ymmA, anyptr_gpB);
  e->vrcp14pd(zmmA, zmmB);
  e->vrcp14pd(zmmA, anyptr_gpB);
  e->vrcp14ps(xmmA, xmmB);
  e->vrcp14ps(xmmA, anyptr_gpB);
  e->vrcp14ps(ymmA, ymmB);
  e->vrcp14ps(ymmA, anyptr_gpB);
  e->vrcp14ps(zmmA, zmmB);
  e->vrcp14ps(zmmA, anyptr_gpB);
  e->vrcp14sd(xmmA, xmmB, xmmC);
  e->vrcp14sd(xmmA, xmmB, anyptr_gpC);
  e->vrcp14ss(xmmA, xmmB, xmmC);
  e->vrcp14ss(xmmA, xmmB, anyptr_gpC);
  e->vrcp28pd(zmmA, zmmB);
  e->vrcp28pd(zmmA, anyptr_gpB);
  e->vrcp28ps(zmmA, zmmB);
  e->vrcp28ps(zmmA, anyptr_gpB);
  e->vrcp28sd(xmmA, xmmB, xmmC);
  e->vrcp28sd(xmmA, xmmB, anyptr_gpC);
  e->vrcp28ss(xmmA, xmmB, xmmC);
  e->vrcp28ss(xmmA, xmmB, anyptr_gpC);
  e->vreducepd(xmmA, xmmB, 0);
  e->vreducepd(xmmA, anyptr_gpB, 0);
  e->vreducepd(ymmA, ymmB, 0);
  e->vreducepd(ymmA, anyptr_gpB, 0);
  e->vreducepd(zmmA, zmmB, 0);
  e->vreducepd(zmmA, anyptr_gpB, 0);
  e->vreduceps(xmmA, xmmB, 0);
  e->vreduceps(xmmA, anyptr_gpB, 0);
  e->vreduceps(ymmA, ymmB, 0);
  e->vreduceps(ymmA, anyptr_gpB, 0);
  e->vreduceps(zmmA, zmmB, 0);
  e->vreduceps(zmmA, anyptr_gpB, 0);
  e->vreducesd(xmmA, xmmB, xmmC, 0);
  e->vreducesd(xmmA, xmmB, anyptr_gpC, 0);
  e->vreducess(xmmA, xmmB, xmmC, 0);
  e->vreducess(xmmA, xmmB, anyptr_gpC, 0);
  e->vrndscalepd(xmmA, xmmB, 0);
  e->vrndscalepd(xmmA, anyptr_gpB, 0);
  e->vrndscalepd(ymmA, ymmB, 0);
  e->vrndscalepd(ymmA, anyptr_gpB, 0);
  e->vrndscalepd(zmmA, zmmB, 0);
  e->vrndscalepd(zmmA, anyptr_gpB, 0);
  e->vrndscaleps(xmmA, xmmB, 0);
  e->vrndscaleps(xmmA, anyptr_gpB, 0);
  e->vrndscaleps(ymmA, ymmB, 0);
  e->vrndscaleps(ymmA, anyptr_gpB, 0);
  e->vrndscaleps(zmmA, zmmB, 0);
  e->vrndscaleps(zmmA, anyptr_gpB, 0);
  e->vrndscalesd(xmmA, xmmB, xmmC, 0);
  e->vrndscalesd(xmmA, xmmB, anyptr_gpC, 0);
  e->vrndscaless(xmmA, xmmB, xmmC, 0);
  e->vrndscaless(xmmA, xmmB, anyptr_gpC, 0);
  e->vrsqrt14pd(xmmA, xmmB);
  e->vrsqrt14pd(xmmA, anyptr_gpB);
  e->vrsqrt14pd(ymmA, ymmB);
  e->vrsqrt14pd(ymmA, anyptr_gpB);
  e->vrsqrt14pd(zmmA, zmmB);
  e->vrsqrt14pd(zmmA, anyptr_gpB);
  e->vrsqrt14ps(xmmA, xmmB);
  e->vrsqrt14ps(xmmA, anyptr_gpB);
  e->vrsqrt14ps(ymmA, ymmB);
  e->vrsqrt14ps(ymmA, anyptr_gpB);
  e->vrsqrt14ps(zmmA, zmmB);
  e->vrsqrt14ps(zmmA, anyptr_gpB);
  e->vrsqrt14sd(xmmA, xmmB, xmmC);
  e->vrsqrt14sd(xmmA, xmmB, anyptr_gpC);
  e->vrsqrt14ss(xmmA, xmmB, xmmC);
  e->vrsqrt14ss(xmmA, xmmB, anyptr_gpC);
  e->vrsqrt28pd(zmmA, zmmB);
  e->vrsqrt28pd(zmmA, anyptr_gpB);
  e->vrsqrt28ps(zmmA, zmmB);
  e->vrsqrt28ps(zmmA, anyptr_gpB);
  e->vrsqrt28sd(xmmA, xmmB, xmmC);
  e->vrsqrt28sd(xmmA, xmmB, anyptr_gpC);
  e->vrsqrt28ss(xmmA, xmmB, xmmC);
  e->vrsqrt28ss(xmmA, xmmB, anyptr_gpC);
  e->vscalefpd(xmmA, xmmB, xmmC);
  e->vscalefpd(xmmA, xmmB, anyptr_gpC);
  e->vscalefpd(ymmA, ymmB, ymmC);
  e->vscalefpd(ymmA, ymmB, anyptr_gpC);
  e->vscalefpd(zmmA, zmmB, zmmC);
  e->vscalefpd(zmmA, zmmB, anyptr_gpC);
  e->vscalefps(xmmA, xmmB, xmmC);
  e->vscalefps(xmmA, xmmB, anyptr_gpC);
  e->vscalefps(ymmA, ymmB, ymmC);
  e->vscalefps(ymmA, ymmB, anyptr_gpC);
  e->vscalefps(zmmA, zmmB, zmmC);
  e->vscalefps(zmmA, zmmB, anyptr_gpC);
  e->vscalefsd(xmmA, xmmB, xmmC);
  e->vscalefsd(xmmA, xmmB, anyptr_gpC);
  e->vscalefss(xmmA, xmmB, xmmC);
  e->vscalefss(xmmA, xmmB, anyptr_gpC);
  e->vscatterdpd(vx_ptr, xmmB);
  e->vscatterdpd(vx_ptr, ymmB);
  e->vscatterdpd(vy_ptr, zmmB);
  e->vscatterdps(vx_ptr, xmmB);
  e->vscatterdps(vy_ptr, ymmB);
  e->vscatterdps(vz_ptr, zmmB);
  e->vscatterpf0dpd(vy_ptr);
  e->vscatterpf0dps(vz_ptr);
  e->vscatterpf0qpd(vz_ptr);
  e->vscatterpf0qps(vz_ptr);
  e->vscatterpf1dpd(vy_ptr);
  e->vscatterpf1dps(vz_ptr);
  e->vscatterpf1qpd(vz_ptr);
  e->vscatterpf1qps(vz_ptr);
  e->vscatterqpd(vx_ptr, xmmB);
  e->vscatterqpd(vy_ptr, ymmB);
  e->vscatterqpd(vz_ptr, zmmB);
  e->vscatterqps(vx_ptr, xmmB);
  e->vscatterqps(vy_ptr, xmmB);
  e->vscatterqps(vz_ptr, ymmB);
  e->vshuff32x4(ymmA, ymmB, ymmC, 0);
  e->vshuff32x4(ymmA, ymmB, anyptr_gpC, 0);
  e->vshuff32x4(zmmA, zmmB, zmmC, 0);
  e->vshuff32x4(zmmA, zmmB, anyptr_gpC, 0);
  e->vshuff64x2(ymmA, ymmB, ymmC, 0);
  e->vshuff64x2(ymmA, ymmB, anyptr_gpC, 0);
  e->vshuff64x2(zmmA, zmmB, zmmC, 0);
  e->vshuff64x2(zmmA, zmmB, anyptr_gpC, 0);
  e->vshufi32x4(ymmA, ymmB, ymmC, 0);
  e->vshufi32x4(ymmA, ymmB, anyptr_gpC, 0);
  e->vshufi32x4(zmmA, zmmB, zmmC, 0);
  e->vshufi32x4(zmmA, zmmB, anyptr_gpC, 0);
  e->vshufi64x2(ymmA, ymmB, ymmC, 0);
  e->vshufi64x2(ymmA, ymmB, anyptr_gpC, 0);
  e->vshufi64x2(zmmA, zmmB, zmmC, 0);
  e->vshufi64x2(zmmA, zmmB, anyptr_gpC, 0);
  e->vshufpd(xmmA, xmmB, xmmC, 0);
  e->vshufpd(xmmA, xmmB, anyptr_gpC, 0);
  e->vshufpd(ymmA, ymmB, ymmC, 0);
  e->vshufpd(ymmA, ymmB, anyptr_gpC, 0);
  e->vshufpd(zmmA, zmmB, zmmC, 0);
  e->vshufpd(zmmA, zmmB, anyptr_gpC, 0);
  e->vshufps(xmmA, xmmB, xmmC, 0);
  e->vshufps(xmmA, xmmB, anyptr_gpC, 0);
  e->vshufps(ymmA, ymmB, ymmC, 0);
  e->vshufps(ymmA, ymmB, anyptr_gpC, 0);
  e->vshufps(zmmA, zmmB, zmmC, 0);
  e->vshufps(zmmA, zmmB, anyptr_gpC, 0);
  e->vsqrtpd(xmmA, xmmB);
  e->vsqrtpd(xmmA, anyptr_gpB);
  e->vsqrtpd(ymmA, ymmB);
  e->vsqrtpd(ymmA, anyptr_gpB);
  e->vsqrtpd(zmmA, zmmB);
  e->vsqrtpd(zmmA, anyptr_gpB);
  e->vsqrtps(xmmA, xmmB);
  e->vsqrtps(xmmA, anyptr_gpB);
  e->vsqrtps(ymmA, ymmB);
  e->vsqrtps(ymmA, anyptr_gpB);
  e->vsqrtps(zmmA, zmmB);
  e->vsqrtps(zmmA, anyptr_gpB);
  e->vsqrtsd(xmmA, xmmB, xmmC);
  e->vsqrtsd(xmmA, xmmB, anyptr_gpC);
  e->vsqrtss(xmmA, xmmB, xmmC);
  e->vsqrtss(xmmA, xmmB, anyptr_gpC);
  e->vsubpd(xmmA, xmmB, xmmC);
  e->vsubpd(xmmA, xmmB, anyptr_gpC);
  e->vsubpd(ymmA, ymmB, ymmC);
  e->vsubpd(ymmA, ymmB, anyptr_gpC);
  e->vsubpd(zmmA, zmmB, zmmC);
  e->vsubpd(zmmA, zmmB, anyptr_gpC);
  e->vsubps(xmmA, xmmB, xmmC);
  e->vsubps(xmmA, xmmB, anyptr_gpC);
  e->vsubps(ymmA, ymmB, ymmC);
  e->vsubps(ymmA, ymmB, anyptr_gpC);
  e->vsubps(zmmA, zmmB, zmmC);
  e->vsubps(zmmA, zmmB, anyptr_gpC);
  e->vsubsd(xmmA, xmmB, xmmC);
  e->vsubsd(xmmA, xmmB, anyptr_gpC);
  e->vsubss(xmmA, xmmB, xmmC);
  e->vsubss(xmmA, xmmB, anyptr_gpC);
  e->vucomisd(xmmA, xmmB);
  e->vucomisd(xmmA, anyptr_gpB);
  e->vucomiss(xmmA, xmmB);
  e->vucomiss(xmmA, anyptr_gpB);
  e->vunpckhpd(xmmA, xmmB, xmmC);
  e->vunpckhpd(xmmA, xmmB, anyptr_gpC);
  e->vunpckhpd(ymmA, ymmB, ymmC);
  e->vunpckhpd(ymmA, ymmB, anyptr_gpC);
  e->vunpckhpd(zmmA, zmmB, zmmC);
  e->vunpckhpd(zmmA, zmmB, anyptr_gpC);
  e->vunpckhps(xmmA, xmmB, xmmC);
  e->vunpckhps(xmmA, xmmB, anyptr_gpC);
  e->vunpckhps(ymmA, ymmB, ymmC);
  e->vunpckhps(ymmA, ymmB, anyptr_gpC);
  e->vunpckhps(zmmA, zmmB, zmmC);
  e->vunpckhps(zmmA, zmmB, anyptr_gpC);
  e->vunpcklpd(xmmA, xmmB, xmmC);
  e->vunpcklpd(xmmA, xmmB, anyptr_gpC);
  e->vunpcklpd(ymmA, ymmB, ymmC);
  e->vunpcklpd(ymmA, ymmB, anyptr_gpC);
  e->vunpcklpd(zmmA, zmmB, zmmC);
  e->vunpcklpd(zmmA, zmmB, anyptr_gpC);
  e->vunpcklps(xmmA, xmmB, xmmC);
  e->vunpcklps(xmmA, xmmB, anyptr_gpC);
  e->vunpcklps(ymmA, ymmB, ymmC);
  e->vunpcklps(ymmA, ymmB, anyptr_gpC);
  e->vunpcklps(zmmA, zmmB, zmmC);
  e->vunpcklps(zmmA, zmmB, anyptr_gpC);
  e->vxorpd(xmmA, xmmB, xmmC);
  e->vxorpd(xmmA, xmmB, anyptr_gpC);
  e->vxorpd(ymmA, ymmB, ymmC);
  e->vxorpd(ymmA, ymmB, anyptr_gpC);
  e->vxorpd(zmmA, zmmB, zmmC);
  e->vxorpd(zmmA, zmmB, anyptr_gpC);
  e->vxorps(xmmA, xmmB, xmmC);
  e->vxorps(xmmA, xmmB, anyptr_gpC);
  e->vxorps(ymmA, ymmB, ymmC);
  e->vxorps(ymmA, ymmB, anyptr_gpC);
  e->vxorps(zmmA, zmmB, zmmC);
  e->vxorps(zmmA, zmmB, anyptr_gpC);

  // Mark the end.
  e->nop();
  e->nop();
  e->nop();
  e->nop();
}

} // {asmtest}

#endif // ASMJIT_TEST_OPCODE_H_INCLUDED
